diff --git a/poetry.lock b/poetry.lock index 47ce16c1..d09e8e97 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "accessible-pygments" @@ -700,6 +700,19 @@ files = [ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] +[[package]] +name = "isodate" +version = "0.7.2" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = ">=3.7" +groups = ["main"] +markers = "python_version == \"3.10\"" +files = [ + {file = "isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15"}, + {file = "isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6"}, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -914,7 +927,7 @@ version = "3.0.2" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["docs"] +groups = ["dev", "docs"] files = [ {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, @@ -1139,6 +1152,24 @@ files = [ dev = ["abi3audit", "black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] test = ["pytest", "pytest-xdist", "setuptools"] +[[package]] +name = "pyaml" +version = "25.7.0" +description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "pyaml-25.7.0-py3-none-any.whl", hash = "sha256:ce5d7867cc2b455efdb9b0448324ff7b9f74d99f64650f12ca570102db6b985f"}, + {file = "pyaml-25.7.0.tar.gz", hash = "sha256:e113a64ec16881bf2b092e2beb84b7dcf1bd98096ad17f5f14e8fb782a75d99b"}, +] + +[package.dependencies] +PyYAML = "*" + +[package.extras] +anchors = ["unidecode"] + [[package]] name = "pycodestyle" version = "2.9.1" @@ -1543,6 +1574,21 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] +[[package]] +name = "pytest-httpserver" +version = "1.1.3" +description = "pytest-httpserver is a httpserver for pytest" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pytest_httpserver-1.1.3-py3-none-any.whl", hash = "sha256:5f84757810233e19e2bb5287f3826a71c97a3740abe3a363af9155c0f82fdbb9"}, + {file = "pytest_httpserver-1.1.3.tar.gz", hash = "sha256:af819d6b533f84b4680b9416a5b3f67f1df3701f1da54924afd4d6e4ba5917ec"}, +] + +[package.dependencies] +Werkzeug = ">=2.0.0" + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1594,7 +1640,7 @@ version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" -groups = ["docs"] +groups = ["main", "docs"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -1651,6 +1697,29 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "rdflib" +version = "7.1.4" +description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." +optional = false +python-versions = "<4.0.0,>=3.8.1" +groups = ["main"] +files = [ + {file = "rdflib-7.1.4-py3-none-any.whl", hash = "sha256:72f4adb1990fa5241abd22ddaf36d7cafa5d91d9ff2ba13f3086d339b213d997"}, + {file = "rdflib-7.1.4.tar.gz", hash = "sha256:fed46e24f26a788e2ab8e445f7077f00edcf95abb73bcef4b86cefa8b62dd174"}, +] + +[package.dependencies] +isodate = {version = ">=0.7.2,<1.0.0", markers = "python_version < \"3.11\""} +pyparsing = ">=2.1.0,<4" + +[package.extras] +berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] +html = ["html5rdf (>=1.2,<2)"] +lxml = ["lxml (>=4.3,<6.0)"] +networkx = ["networkx (>=2,<4)"] +orjson = ["orjson (>=3.9.14,<4)"] + [[package]] name = "requests" version = "2.32.4" @@ -1806,6 +1875,21 @@ files = [ {file = "ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f"}, ] +[[package]] +name = "schemaorg" +version = "0.1.1" +description = "Python functions for applied use of schema.org" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "schemaorg-0.1.1.tar.gz", hash = "sha256:567f1735df666221c893d2c206dd70f9cddcc983c8cdc39f3a7b7726884d2c51"}, +] + +[package.dependencies] +lxml = ">=4.1.1" +pyaml = ">=17.12.1" + [[package]] name = "setuptools" version = "80.9.0" @@ -2370,6 +2454,24 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "werkzeug" +version = "3.1.3" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"}, + {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"}, +] + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + [[package]] name = "wheel" version = "0.45.1" @@ -2477,4 +2579,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10, <4.0.0" -content-hash = "58304fd33d6ec1ce3400b43ecffb16b3f48a5621e513c3e8057f9e3e050835e8" +content-hash = "a8cbf610eb2e4405175914d22c8383c796ccb443724d04d2f3e58fcb08205e66" diff --git a/pyproject.toml b/pyproject.toml index a42543b9..d28f60fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,8 @@ dependencies = [ "pydantic-settings>=2.1.0, <3.0.0", "requests-oauthlib>=2.0.0, <3.0.0", "pynacl>=1.5.0, <2.0.0", + "rdflib (>=7.1.4,<8.0.0)", + "schemaorg (>=0.1.1,<0.2.0)", ] requires-python = ">=3.10, <4.0.0" @@ -76,6 +78,7 @@ pytest-cov = "^3.0.0" taskipy = "^1.10.3" flake8 = "^5.0.4" requests-mock = "^1.10.0" +pytest-httpserver = "^1.1.3" # Packages for developers for creating documentation [tool.poetry.group.docs] diff --git a/src/hermes/model/types/ld_container.py b/src/hermes/model/types/ld_container.py index fd84e033..9d8b10c2 100644 --- a/src/hermes/model/types/ld_container.py +++ b/src/hermes/model/types/ld_container.py @@ -47,7 +47,7 @@ def __init__(self, data, *, parent=None, key=None, index=None, context=None): else: self.active_ctx = parent.active_ctx else: - self.active_ctx = self.ld_proc.inital_ctx( + self.active_ctx = self.ld_proc.initial_ctx( self.full_context, {"documentLoader": bundled_loader} ) diff --git a/src/hermes/model/types/pyld_util.py b/src/hermes/model/types/pyld_util.py index f652cce8..1d3f9bff 100644 --- a/src/hermes/model/types/pyld_util.py +++ b/src/hermes/model/types/pyld_util.py @@ -106,7 +106,7 @@ def expand_iri(self, active_ctx: t.Any, short_iri: str) -> str: def compact_iri(self, active_ctx: t.Any, long_iri: str) -> str: return self._compact_iri(active_ctx, long_iri, vocab=True) - def inital_ctx(self, local_ctx, options=None): + def initial_ctx(self, local_ctx, options=None): return self.process_context(self._INITIAL_CONTEXT, local_ctx, options or {}) @classmethod diff --git a/test/hermes_test/model/types/__init__.py b/test/hermes_test/model/types/__init__.py index e69de29b..ef943d95 100644 --- a/test/hermes_test/model/types/__init__.py +++ b/test/hermes_test/model/types/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Sophie Kernchen diff --git a/test/hermes_test/model/types/conftest.py b/test/hermes_test/model/types/conftest.py new file mode 100644 index 00000000..8a1c7c2e --- /dev/null +++ b/test/hermes_test/model/types/conftest.py @@ -0,0 +1,88 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +from datetime import datetime + +import pytest + + +class MockDocument: + """ + Helper that provides valid JSON-LD data. + """ + + @classmethod + def vocabulary(cls, base_url: str = "http://spam.eggs/") -> dict: + """ + Retrieve the vocabulary used for the document. + + :param base_url: Optional base URL to use for IRIs in the vocabulary. + :returns: A JSON-LD vocabulary usable in as document context. + """ + return { + "spam": {"@id": f"{base_url}spam"}, + "ham": {"@id": f"{base_url}ham", "@type": "@id"}, + "eggs": {"@id": f"{base_url}eggs", "@container": "@list"}, + "use_until": {"@id": f"{base_url}use_until", "@type": "http://schema.org/DateTime"}, + + "Egg": {"@id": f"{base_url}Egg"}, + } + + @classmethod + def compact(cls, base_url: str = "http://spam.eggs/", embed_vocabulary: bool = True) -> dict: + """ + Get compact representation of the example document. + + :param base_url: Optional base URL used to generate the context. + :param embed_vocabulary: Optional switch to indicate whether the vocabulary should be embedded in the context + or only refrenced by the base url. + :returns: The rendered compact document. + """ + return { + "@context": cls.vocabulary(base_url) if embed_vocabulary else base_url, + + "spam": "bacon", + "ham": f"{base_url}identifier", + "eggs": [ + {"@type": "Egg", "use_until": datetime(2024, 4, 20, 16, 20).isoformat()}, + {"@type": "Egg", "use_until": datetime(2026, 12, 31, 23, 59, 59).isoformat()}, + ] + } + + @classmethod + def expanded(cls, base_url: str = "http://spam.eggs/") -> list[dict]: + """ + Get expanded representation of the example document. + + :param base_url: Optional base URL to use for IRIs. + :returns: The rendered expanded document. + """ + return [{ + f"{base_url}spam": [{"@value": "bacon"}], + f"{base_url}ham": [{"@id": f"{base_url}identifier"}], + f"{base_url}eggs": [{"@list": [ + { + "@type": [f"{base_url}Egg"], + f"{base_url}use_until": [ + {"@type": "http://schema.org/DateTime", "@value": "2024-04-20T16:20:00"} + ], + }, + { + "@type": [f"{base_url}Egg"], + f"{base_url}use_until": [ + {"@type": "http://schema.org/DateTime", "@value": "2026-12-31T23:59:59"} + ], + } + ]}] + }] + + +@pytest.fixture +def mock_context(): + return MockDocument.vocabulary() + + +@pytest.fixture +def mock_document(): + return MockDocument diff --git a/test/hermes_test/model/types/test_ld_container.py b/test/hermes_test/model/types/test_ld_container.py new file mode 100644 index 00000000..9da5b461 --- /dev/null +++ b/test/hermes_test/model/types/test_ld_container.py @@ -0,0 +1,155 @@ +# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) +# +# SPDX-License-Identifier: Apache-2.0 + +# SPDX-FileContributor: Sophie Kernchen +# SPDX-FileContributor: Michael Meinel + +from datetime import datetime + +import pytest + +from hermes.model.types.ld_container import ld_container + +'''we expect user of this class to give the right input data types + +example extended json ld: + [{ + "http://schema.org/name": [{"@value": "bacon"}], + "eggs": [{"@id": "spam"}], + "green": [{"@id": "png"}] + }] +''' + + +class TestLdContainer: + @classmethod + @pytest.fixture(autouse=True) + def setup_class(cls, httpserver, mock_document): + cls.url = httpserver.url_for("/") + httpserver.expect_request("/").respond_with_json({"@context": mock_document.vocabulary(cls.url)}) + + def test_container_basic(self): + cont = ld_container([{"spam": [{"@value": "bacon"}]}]) + + assert cont.key is None + assert cont.context == [] + assert cont._data == [{"spam": [{"@value": "bacon"}]}] + assert cont.path == ["$"] + + def test_container_ld_value(self): + cont = ld_container([{"spam": [{"@value": "bacon"}]}]) + + assert cont.ld_value == [{"spam": [{"@value": "bacon"}]}] + + def test_container_add_context(self): + cont = ld_container([{"spam": [{"@value": "bacon"}]}]) + cont.add_context([self.url]) + + assert cont.context == [self.url] + assert cont.full_context == [self.url] + + def test_container_parent(self): + cont_data = [{"spam": [{"@value": "bacon"}]}] + cont_parent = ld_container([{"ham": cont_data}]) + cont = ld_container(cont_data, parent=cont_parent, key="ham") + assert cont.full_context == [] + + cont_parent.add_context([self.url]) + + assert cont.parent == cont_parent + assert cont.full_context == [self.url] + + def test_container_full_context_and_path(self, httpserver): + httpserver.expect_request("/url2").respond_with_json({"spam": "eggs"}) + httpserver.expect_request("/url3").respond_with_json({"ham": "bacon"}) + httpserver.expect_request("/url4").respond_with_json({"@context": {"id": "@id"}}) + + cont_data = [{"spam": [{"@value": "bacon"}]}] + cont_parent_data = [cont_data] + cont_grand_parent = ld_container([{"ham": cont_parent_data}], context=[self.url]) + cont_parent = ld_container(cont_parent_data, context=[httpserver.url_for("/url2"), + httpserver.url_for("/url4")], + parent=cont_grand_parent, key="ham") + cont = ld_container(cont_data, context=[httpserver.url_for("/url3")], parent=cont_parent, + index=0) + assert cont_parent.full_context == [self.url, httpserver.url_for("/url2"), httpserver.url_for("/url4")] + assert cont.full_context == [self.url, httpserver.url_for("/url2"), httpserver.url_for("/url4"), + httpserver.url_for("/url3")] + assert cont_grand_parent.path == ["$"] + assert cont_parent.path == ["$", "ham"] + assert cont.path == ["$", "ham", 0] + + def test_container_str_and_repr(self): + cont = ld_container([{"spam": [{"@value": "bacon"}]}]) + assert repr(cont) == "ld_container({'spam': [{'@value': 'bacon'}]})" + with pytest.raises(NotImplementedError): + str(cont) + + def test_to_python_id(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_python("@id", "http://spam.eggs/ham") == "http://spam.eggs/ham" + + def test_to_python_id_with_prefix(self, mock_context): + cont = ld_container([{}], context=[mock_context, {"prefix": self.url}]) + assert cont._to_python("@id", f"{self.url}identifier") == "prefix:identifier" + + def test_to_python_type(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_python("@type", ["@id"]) == '@id' + assert cont._to_python("@type", ["@id", "http://spam.eggs/Egg"]) == ["@id", "Egg"] + + def test_to_python_id_value(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_python("http://spam.eggs/ham", + [{"@id": "http://spam.eggs/spam"}]) == "http://spam.eggs/spam" + assert cont._to_python("http://spam.eggs/ham", + [{"@id": "http://spam.eggs/identifier"}]) == "http://spam.eggs/identifier" + + def test_to_python_basic_value(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_python("http://soam.eggs/spam", [{"@value": "bacon"}]) == 'bacon' + assert cont._to_python("http://spam.eggs/spam", [{"@value": True}]) is True + assert cont._to_python("http://spam.eggs/spam", [{"@value": 123}]) == 123 + + def test_to_python_datetime_value(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_python("http://spam.eggs/eggs", [{ + "@value": "2022-02-22T00:00:00", "@type": "https://schema.org/DateTime" + }]) == "2022-02-22T00:00:00" + + def test_to_expanded_id(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_expanded_json("@id", f"{self.url}identifier") == f"{self.url}identifier" + + # Regression test: "ham" is vocabulary and must not be expanded. + assert cont._to_expanded_json("@id", "ham") == "ham" + + def test_to_expanded_id_with_prefix(self, mock_context): + cont = ld_container([{}], context=[mock_context, {"prefix": self.url}]) + assert cont._to_expanded_json("@id", "prefix:identifier") == f"{self.url}identifier" + + # Regression test: "ham" should still not be expaned, but "prefix:ham" should be. + assert cont._to_expanded_json("@id", "ham") == "ham" + assert cont._to_expanded_json("@id", "prefix:ham") == f"{self.url}ham" + + def test_to_expanded_type(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_expanded_json("@type", "Egg") == ["http://spam.eggs/Egg"] + assert cont._to_expanded_json("@type", ["Egg", "@id"]) == ["http://spam.eggs/Egg", "@id"] + + def test_to_expanded_id_value(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_expanded_json("ham", "spam") == [{"@id": "spam"}] + + def test_to_expanded_basic_value(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_expanded_json("spam", "bacon") == [{"@value": "bacon"}] + assert cont._to_expanded_json("spam", 123) == [{"@value": 123}] + assert cont._to_expanded_json("spam", True) == [{"@value": True}] + + def test_to_expanded_datetime_value(self, mock_context): + cont = ld_container([{}], context=[mock_context]) + assert cont._to_expanded_json("eggs", datetime(2022, 2, 22)) == [ + {"@value": "2022-02-22T00:00:00", "@type": "http://schema.org/DateTime"} + ] diff --git a/test/hermes_test/model/types/test_pyld_util.py b/test/hermes_test/model/types/test_pyld_util.py new file mode 100644 index 00000000..fa4e539d --- /dev/null +++ b/test/hermes_test/model/types/test_pyld_util.py @@ -0,0 +1,18 @@ +import pytest + +from hermes.model.types import pyld_util + + +@pytest.fixture +def ld_proc(): + return pyld_util.JsonLdProcessor() + + +def test_mock_document_compact(ld_proc, mock_document): + compact_document = ld_proc.compact(mock_document.expanded(), [mock_document.vocabulary()], {}) + assert compact_document == mock_document.compact() + + +def test_mock_document_expanded(ld_proc, mock_document): + expanded_document = ld_proc.expand(mock_document.compact(), {}) + assert expanded_document == mock_document.expanded()