From 8b4c8b5793d73177ea9ed652801717707056f33e Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:31:02 +0530 Subject: [PATCH 1/8] Add types to codebase --- .github/workflows/test.yml | 3 ++ frontmatter/__init__.py | 77 +++++++++++++++++++--------- frontmatter/conftest.py | 8 --- frontmatter/default_handlers.py | 91 ++++++++++++++++++++------------- frontmatter/util.py | 11 ++-- mypy.ini | 3 ++ setup.py | 5 +- 7 files changed, 126 insertions(+), 72 deletions(-) delete mode 100644 frontmatter/conftest.py create mode 100644 mypy.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 63248b2..d053520 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,3 +27,6 @@ jobs: - name: Run tests run: | pytest . --doctest-modules --doctest-glob "README.md" + - name: Run type checking + run: | + mypy . diff --git a/frontmatter/__init__.py b/frontmatter/__init__.py index b9efb5d..2ec44e8 100644 --- a/frontmatter/__init__.py +++ b/frontmatter/__init__.py @@ -2,15 +2,21 @@ """ Python Frontmatter: Parse and manage posts with YAML frontmatter """ +from __future__ import annotations import codecs -import re - +import io +import os +from typing import TYPE_CHECKING, Iterable, cast from .util import u from .default_handlers import YAMLHandler, JSONHandler, TOMLHandler +if TYPE_CHECKING: + from .default_handlers import BaseHandler + + __all__ = ["parse", "load", "loads", "dump", "dumps"] @@ -22,7 +28,7 @@ ] -def detect_format(text, handlers): +def detect_format(text: str, handlers: Iterable[BaseHandler]) -> BaseHandler | None: """ Figure out which handler to use, based on metadata. Returns a handler instance or None. @@ -40,7 +46,12 @@ def detect_format(text, handlers): return None -def parse(text, encoding="utf-8", handler=None, **defaults): +def parse( + text: str, + encoding: str = "utf-8", + handler: BaseHandler | None = None, + **defaults: object, +) -> tuple[dict[str, object], str]: """ Parse text with frontmatter, return metadata and content. Pass in optional metadata defaults as keyword args. @@ -79,14 +90,14 @@ def parse(text, encoding="utf-8", handler=None, **defaults): return metadata, text # parse, now that we have frontmatter - fm = handler.load(fm) - if isinstance(fm, dict): - metadata.update(fm) + fm_data = handler.load(fm) + if isinstance(fm_data, dict): + metadata.update(fm_data) return metadata, content.strip() -def check(fd, encoding="utf-8"): +def check(fd: str | io.IOBase, encoding: str = "utf-8") -> bool: """ Check if a file-like object or filename has a frontmatter, return True if exists, False otherwise. @@ -109,7 +120,7 @@ def check(fd, encoding="utf-8"): return checks(text, encoding) -def checks(text, encoding="utf-8"): +def checks(text: str, encoding: str = "utf-8") -> bool: """ Check if a text (binary or unicode) has a frontmatter, return True if exists, False otherwise. @@ -127,7 +138,12 @@ def checks(text, encoding="utf-8"): return detect_format(text, handlers) != None -def load(fd, encoding="utf-8", handler=None, **defaults): +def load( + fd: str | io.IOBase, + encoding: str = "utf-8", + handler: BaseHandler | None = None, + **defaults: object, +) -> Post: """ Load and parse a file-like object or filename, return a :py:class:`post `. @@ -150,7 +166,12 @@ def load(fd, encoding="utf-8", handler=None, **defaults): return loads(text, encoding, handler, **defaults) -def loads(text, encoding="utf-8", handler=None, **defaults): +def loads( + text: str, + encoding: str = "utf-8", + handler: BaseHandler | None = None, + **defaults: object, +) -> Post: """ Parse text (binary or unicode) and return a :py:class:`post `. @@ -166,7 +187,13 @@ def loads(text, encoding="utf-8", handler=None, **defaults): return Post(content, handler, **metadata) -def dump(post, fd, encoding="utf-8", handler=None, **kwargs): +def dump( + post: Post, + fd: str | io.IOBase, + encoding: str = "utf-8", + handler: BaseHandler | None = None, + **kwargs: object, +) -> None: """ Serialize :py:class:`post ` to a string and write to a file-like object. Text will be encoded on the way out (utf-8 by default). @@ -213,7 +240,7 @@ def dump(post, fd, encoding="utf-8", handler=None, **kwargs): f.write(content) -def dumps(post, handler=None, **kwargs): +def dumps(post: Post, handler: BaseHandler | None = None, **kwargs: object) -> str: """ Serialize a :py:class:`post ` to a string and return text. This always returns unicode text, which can then be encoded. @@ -265,46 +292,48 @@ class Post(object): For convenience, metadata values are available as proxied item lookups. """ - def __init__(self, content, handler=None, **metadata): + def __init__( + self, content: str, handler: BaseHandler | None = None, **metadata: object + ) -> None: self.content = str(content) self.metadata = metadata self.handler = handler - def __getitem__(self, name): + def __getitem__(self, name: str) -> object: "Get metadata key" return self.metadata[name] - def __contains__(self, item): + def __contains__(self, item: object) -> bool: "Check metadata contains key" return item in self.metadata - def __setitem__(self, name, value): + def __setitem__(self, name: str, value: object) -> None: "Set a metadata key" self.metadata[name] = value - def __delitem__(self, name): + def __delitem__(self, name: str) -> None: "Delete a metadata key" del self.metadata[name] - def __bytes__(self): + def __bytes__(self) -> bytes: return self.content.encode("utf-8") - def __str__(self): + def __str__(self) -> str: return self.content - def get(self, key, default=None): + def get(self, key: str, default: object = None) -> object: "Get a key, fallback to default" return self.metadata.get(key, default) - def keys(self): + def keys(self) -> Iterable[str]: "Return metadata keys" return self.metadata.keys() - def values(self): + def values(self) -> Iterable[object]: "Return metadata values" return self.metadata.values() - def to_dict(self): + def to_dict(self) -> dict[str, object]: "Post as a dict, for serializing" d = self.metadata.copy() d["content"] = self.content diff --git a/frontmatter/conftest.py b/frontmatter/conftest.py deleted file mode 100644 index e804a6e..0000000 --- a/frontmatter/conftest.py +++ /dev/null @@ -1,8 +0,0 @@ -import pytest - - -@pytest.fixture(autouse=True) -def add_globals(doctest_namespace): - import frontmatter - - doctest_namespace["frontmatter"] = frontmatter diff --git a/frontmatter/default_handlers.py b/frontmatter/default_handlers.py index 41d42e3..3556c20 100644 --- a/frontmatter/default_handlers.py +++ b/frontmatter/default_handlers.py @@ -116,11 +116,19 @@ """ +from __future__ import annotations import json import re import yaml +from types import ModuleType +from typing import TYPE_CHECKING, Any, Type + +SafeDumper: Type[yaml.CDumper] | Type[yaml.SafeDumper] +SafeLoader: Type[yaml.CSafeLoader] | Type[yaml.SafeLoader] +toml: ModuleType | None + try: from yaml import CSafeDumper as SafeDumper from yaml import CSafeLoader as SafeLoader @@ -136,6 +144,10 @@ from .util import u +if TYPE_CHECKING: + from frontmatter import Post + + __all__ = ["BaseHandler", "YAMLHandler", "JSONHandler"] if toml: @@ -159,11 +171,16 @@ class BaseHandler: All default handlers are subclassed from BaseHandler. """ - FM_BOUNDARY = None - START_DELIMITER = None - END_DELIMITER = None + FM_BOUNDARY: re.Pattern[str] | None = None + START_DELIMITER: str | None = None + END_DELIMITER: str | None = None - def __init__(self, fm_boundary=None, start_delimiter=None, end_delimiter=None): + def __init__( + self, + fm_boundary: re.Pattern[str] | None = None, + start_delimiter: str | None = None, + end_delimiter: str | None = None, + ): self.FM_BOUNDARY = fm_boundary or self.FM_BOUNDARY self.START_DELIMITER = start_delimiter or self.START_DELIMITER self.END_DELIMITER = end_delimiter or self.END_DELIMITER @@ -176,7 +193,7 @@ def __init__(self, fm_boundary=None, start_delimiter=None, end_delimiter=None): ) ) - def detect(self, text): + def detect(self, text: str) -> bool: """ Decide whether this handler can parse the given ``text``, and return True or False. @@ -184,30 +201,32 @@ def detect(self, text): Note that this is *not* called when passing a handler instance to :py:func:`frontmatter.load ` or :py:func:`loads `. """ + assert self.FM_BOUNDARY is not None if self.FM_BOUNDARY.match(text): return True return False - def split(self, text): + def split(self, text: str) -> tuple[str, str]: """ Split text into frontmatter and content """ + assert self.FM_BOUNDARY is not None _, fm, content = self.FM_BOUNDARY.split(text, 2) return fm, content - def load(self, fm): + def load(self, fm: str) -> dict[str, Any]: """ Parse frontmatter and return a dict """ raise NotImplementedError - def export(self, metadata, **kwargs): + def export(self, metadata: dict[str, object], **kwargs: object) -> str: """ Turn metadata back into text """ raise NotImplementedError - def format(self, post, **kwargs): + def format(self, post: Post, **kwargs: object) -> str: """ Turn a post into a string, used in ``frontmatter.dumps`` """ @@ -233,14 +252,14 @@ class YAMLHandler(BaseHandler): FM_BOUNDARY = re.compile(r"^-{3,}\s*$", re.MULTILINE) START_DELIMITER = END_DELIMITER = "---" - def load(self, fm, **kwargs): + def load(self, fm: str, **kwargs: object) -> Any: """ Parse YAML front matter. This uses yaml.SafeLoader by default. """ kwargs.setdefault("Loader", SafeLoader) - return yaml.load(fm, **kwargs) + return yaml.load(fm, **kwargs) # type: ignore[arg-type] - def export(self, metadata, **kwargs): + def export(self, metadata: dict[str, object], **kwargs: object) -> str: """ Export metadata as YAML. This uses yaml.SafeDumper by default. """ @@ -248,8 +267,8 @@ def export(self, metadata, **kwargs): kwargs.setdefault("default_flow_style", False) kwargs.setdefault("allow_unicode", True) - metadata = yaml.dump(metadata, **kwargs).strip() - return u(metadata) # ensure unicode + metadata_str = yaml.dump(metadata, **kwargs).strip() # type: ignore[call-overload] + return u(metadata_str) # ensure unicode class JSONHandler(BaseHandler): @@ -263,40 +282,42 @@ class JSONHandler(BaseHandler): START_DELIMITER = "" END_DELIMITER = "" - def split(self, text): + def split(self, text: str) -> tuple[str, str]: _, fm, content = self.FM_BOUNDARY.split(text, 2) return "{" + fm + "}", content - def load(self, fm, **kwargs): - return json.loads(fm, **kwargs) + def load(self, fm: str, **kwargs: object) -> Any: + return json.loads(fm, **kwargs) # type: ignore[arg-type] - def export(self, metadata, **kwargs): + def export(self, metadata: dict[str, object], **kwargs: object) -> str: "Turn metadata into JSON" kwargs.setdefault("indent", 4) - metadata = json.dumps(metadata, **kwargs) - return u(metadata) - + metadata_str = json.dumps(metadata, **kwargs) # type: ignore[arg-type] + return u(metadata_str) -if toml: - class TOMLHandler(BaseHandler): - """ - Load and export TOML metadata. +class _TOMLHandler(BaseHandler): + """ + Load and export TOML metadata. - By default, split based on ``+++``. - """ + By default, split based on ``+++``. + """ - FM_BOUNDARY = re.compile(r"^\+{3,}\s*$", re.MULTILINE) - START_DELIMITER = END_DELIMITER = "+++" + FM_BOUNDARY = re.compile(r"^\+{3,}\s*$", re.MULTILINE) + START_DELIMITER = END_DELIMITER = "+++" - def load(self, fm, **kwargs): - return toml.loads(fm, **kwargs) + def load(self, fm: str, **kwargs: object) -> Any: + assert toml is not None + return toml.loads(fm, **kwargs) - def export(self, metadata, **kwargs): - "Turn metadata into TOML" - metadata = toml.dumps(metadata) - return u(metadata) + def export(self, metadata: dict[str, object], **kwargs: object) -> str: + "Turn metadata into TOML" + assert toml is not None + metadata_str = toml.dumps(metadata) + return u(metadata_str) +if toml: + TOMLHandler: Type[_TOMLHandler] | None = _TOMLHandler else: TOMLHandler = None diff --git a/frontmatter/util.py b/frontmatter/util.py index 602f8fd..bf38eac 100644 --- a/frontmatter/util.py +++ b/frontmatter/util.py @@ -2,14 +2,17 @@ """ Utilities for handling unicode and other repetitive bits """ +from typing import AnyStr -def u(text, encoding="utf-8"): +def u(text: AnyStr, encoding: str = "utf-8") -> str: "Return unicode text, no matter what" if isinstance(text, bytes): - text = text.decode(encoding) + text_str: str = text.decode(encoding) + else: + text_str = text # it's already unicode - text = text.replace("\r\n", "\n") - return text + text_str = text_str.replace("\r\n", "\n") + return text_str diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..965ccc3 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +strict = True +exclude = setup.py|venv*|build|docs|examples|tests diff --git a/setup.py b/setup.py index fa18562..0d84ff9 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,10 @@ packages=["frontmatter"], include_package_data=True, install_requires=["PyYAML"], - extras_require={"test": ["pytest", "toml", "pyaml"], "docs": ["sphinx"]}, + extras_require={ + "test": ["pytest", "toml", "pyaml", "mypy", "types-PyYAML", "types-toml"], + "docs": ["sphinx"], + }, tests_require=["python-frontmatter[test]"], license="MIT", zip_safe=False, From 48322873e1362a1115264da6689d61bdb8fd75fa Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:36:02 +0530 Subject: [PATCH 2/8] undo conftest change --- frontmatter/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 frontmatter/conftest.py diff --git a/frontmatter/conftest.py b/frontmatter/conftest.py new file mode 100644 index 0000000..e804a6e --- /dev/null +++ b/frontmatter/conftest.py @@ -0,0 +1,8 @@ +import pytest + + +@pytest.fixture(autouse=True) +def add_globals(doctest_namespace): + import frontmatter + + doctest_namespace["frontmatter"] = frontmatter From cef1b91551814c003e1fc392ad368cd756f409d8 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:38:49 +0530 Subject: [PATCH 3/8] add types to conftest --- frontmatter/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontmatter/conftest.py b/frontmatter/conftest.py index e804a6e..93b92cc 100644 --- a/frontmatter/conftest.py +++ b/frontmatter/conftest.py @@ -2,7 +2,7 @@ @pytest.fixture(autouse=True) -def add_globals(doctest_namespace): +def add_globals(doctest_namespace: dict[str, object]) -> None: import frontmatter doctest_namespace["frontmatter"] = frontmatter From e4fe58441406d29bea5428dc6c7c16d274d8e98a Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:38:58 +0530 Subject: [PATCH 4/8] remove unused imports --- frontmatter/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/frontmatter/__init__.py b/frontmatter/__init__.py index 2ec44e8..12a44c7 100644 --- a/frontmatter/__init__.py +++ b/frontmatter/__init__.py @@ -6,8 +6,7 @@ import codecs import io -import os -from typing import TYPE_CHECKING, Iterable, cast +from typing import TYPE_CHECKING, Iterable from .util import u from .default_handlers import YAMLHandler, JSONHandler, TOMLHandler From 2562a2c61231676a77bf68c8678fb01065813543 Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:40:31 +0530 Subject: [PATCH 5/8] fix failing doctest --- frontmatter/default_handlers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frontmatter/default_handlers.py b/frontmatter/default_handlers.py index 3556c20..5f32396 100644 --- a/frontmatter/default_handlers.py +++ b/frontmatter/default_handlers.py @@ -317,6 +317,8 @@ def export(self, metadata: dict[str, object], **kwargs: object) -> str: return u(metadata_str) +_TOMLHandler.__name__ = "TOMLHandler" + if toml: TOMLHandler: Type[_TOMLHandler] | None = _TOMLHandler else: From c3fd6b0892c242ce449501e3e3541dfce09d9d7d Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:41:38 +0530 Subject: [PATCH 6/8] add future annotations import to conftest --- frontmatter/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/frontmatter/conftest.py b/frontmatter/conftest.py index 93b92cc..c0fa5e7 100644 --- a/frontmatter/conftest.py +++ b/frontmatter/conftest.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import pytest From ebe2f5fbcd0cf45d8489734c7bbfafdd578b913e Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Wed, 15 Nov 2023 00:50:10 +0530 Subject: [PATCH 7/8] undo unnecessary change --- frontmatter/default_handlers.py | 37 +++++++++++++++------------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/frontmatter/default_handlers.py b/frontmatter/default_handlers.py index 5f32396..f16eb15 100644 --- a/frontmatter/default_handlers.py +++ b/frontmatter/default_handlers.py @@ -296,30 +296,27 @@ def export(self, metadata: dict[str, object], **kwargs: object) -> str: return u(metadata_str) -class _TOMLHandler(BaseHandler): - """ - Load and export TOML metadata. - - By default, split based on ``+++``. - """ +if toml: - FM_BOUNDARY = re.compile(r"^\+{3,}\s*$", re.MULTILINE) - START_DELIMITER = END_DELIMITER = "+++" + class TOMLHandler(BaseHandler): + """ + Load and export TOML metadata. - def load(self, fm: str, **kwargs: object) -> Any: - assert toml is not None - return toml.loads(fm, **kwargs) + By default, split based on ``+++``. + """ - def export(self, metadata: dict[str, object], **kwargs: object) -> str: - "Turn metadata into TOML" - assert toml is not None - metadata_str = toml.dumps(metadata) - return u(metadata_str) + FM_BOUNDARY = re.compile(r"^\+{3,}\s*$", re.MULTILINE) + START_DELIMITER = END_DELIMITER = "+++" + def load(self, fm: str, **kwargs: object) -> Any: + assert toml is not None + return toml.loads(fm, **kwargs) -_TOMLHandler.__name__ = "TOMLHandler" + def export(self, metadata: dict[str, object], **kwargs: object) -> str: + "Turn metadata into TOML" + assert toml is not None + metadata_str = toml.dumps(metadata) + return u(metadata_str) -if toml: - TOMLHandler: Type[_TOMLHandler] | None = _TOMLHandler else: - TOMLHandler = None + TOMLHandler: Type[TOMLHandler] | None = None # type: ignore[no-redef] From 5aa3109822cf3c87f837d59e0ebba85e27ae5e6f Mon Sep 17 00:00:00 2001 From: Tushar Sadhwani Date: Fri, 12 Jan 2024 15:59:45 +0530 Subject: [PATCH 8/8] add py.typed file --- frontmatter/py.typed | 1 + 1 file changed, 1 insertion(+) create mode 100644 frontmatter/py.typed diff --git a/frontmatter/py.typed b/frontmatter/py.typed new file mode 100644 index 0000000..d3245e7 --- /dev/null +++ b/frontmatter/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. This package uses inline types.