From 3c0a4fcd7ac991e33f848e77a03d8e5896c8206e Mon Sep 17 00:00:00 2001 From: Nolan Woods Date: Fri, 1 Jul 2022 14:14:41 -0700 Subject: [PATCH 1/2] Allow reusing anchors in YAML YAML Spec permits anchor name reuse, but ruaml.yaml by default errors for them. https://stackoverflow.com/questions/39013993/parse-a-yaml-with-duplicate-anchors-in-python --- src/check_jsonschema/loaders/instance/yaml.py | 6 +++++- src/check_jsonschema/loaders/schema/readers.py | 6 +++++- tests/unit/test_loaders.py | 8 ++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/check_jsonschema/loaders/instance/yaml.py b/src/check_jsonschema/loaders/instance/yaml.py index cbbbaf793..04f5b0a88 100644 --- a/src/check_jsonschema/loaders/instance/yaml.py +++ b/src/check_jsonschema/loaders/instance/yaml.py @@ -1,8 +1,12 @@ import typing as t +import warnings import ruamel.yaml +from ruamel.yaml.error import ReusedAnchorWarning -_yaml = ruamel.yaml.YAML(typ="safe") +warnings.simplefilter("ignore", ReusedAnchorWarning) + +_yaml = ruamel.yaml.YAML(typ="safe", pure=True) # ruamel.yaml parses timestamp values into datetime.datetime values which differs from # JSON which parses timestamps as strings. Turn off this feature. diff --git a/src/check_jsonschema/loaders/schema/readers.py b/src/check_jsonschema/loaders/schema/readers.py index 93883a512..f814eb32c 100644 --- a/src/check_jsonschema/loaders/schema/readers.py +++ b/src/check_jsonschema/loaders/schema/readers.py @@ -2,16 +2,20 @@ import json import typing as t +import warnings import identify import ruamel.yaml +from ruamel.yaml.error import ReusedAnchorWarning from check_jsonschema import utils from ...cachedownloader import CacheDownloader from ..errors import SchemaParseError -yaml = ruamel.yaml.YAML(typ="safe") +warnings.simplefilter("ignore", ReusedAnchorWarning) + +yaml = ruamel.yaml.YAML(typ="safe", pure=True) def _json_load_schema(schema_location: str, fp: t.IO) -> dict: diff --git a/tests/unit/test_loaders.py b/tests/unit/test_loaders.py index 1e32a810f..22e500cc3 100644 --- a/tests/unit/test_loaders.py +++ b/tests/unit/test_loaders.py @@ -55,11 +55,11 @@ def test_schemaloader_local_yaml_data(tmp_path, filename): a: type: object properties: - b: + b: &anchor type: array items: type: integer - c: + c: &anchor type: string """ ) @@ -130,10 +130,10 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_ft): f.write_text( """\ a: - b: + b: &anchor - 1 - 2 - c: d + c: &anchor d """ ) loader = InstanceLoader([str(f)], default_filetype=default_ft) From 69a8ecadbaa9bcb3640f890633a7448fa89c3c80 Mon Sep 17 00:00:00 2001 From: Nolan Woods Date: Fri, 1 Jul 2022 16:20:42 -0700 Subject: [PATCH 2/2] Refactor anchor tests --- tests/unit/test_loaders.py | 66 +++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_loaders.py b/tests/unit/test_loaders.py index 22e500cc3..8644c8aee 100644 --- a/tests/unit/test_loaders.py +++ b/tests/unit/test_loaders.py @@ -2,6 +2,7 @@ import pathlib import pytest +import ruamel.yaml.composer from check_jsonschema.loaders import BadFileTypeError, InstanceLoader, SchemaLoader from check_jsonschema.loaders.instance.json5 import ENABLED as JSON5_ENABLED @@ -55,11 +56,11 @@ def test_schemaloader_local_yaml_data(tmp_path, filename): a: type: object properties: - b: &anchor + b: type: array items: type: integer - c: &anchor + c: type: string """ ) @@ -80,6 +81,38 @@ def test_schemaloader_local_yaml_data(tmp_path, filename): } +@pytest.mark.parametrize( + "filename", + [ + "schema.yaml", + ], +) +@pytest.mark.filterwarnings("ignore:ReusedAnchorWarning") +def test_schemaloader_local_yaml_dup_anchor(tmp_path, filename): + f = tmp_path / filename + f.write_text( + """ +--- +"$schema": https://json-schema.org/draft/2020-12/schema +type: object +properties: + a: + type: object + properties: + b: &anchor + type: array + items: + type: integer + c: &anchor + type: string +""" + ) + try: + SchemaLoader(str(f)) + except ruamel.yaml.composer.ComposerError as e: + raise AssertionError(f"YAML loader does not support duplicate anchors {e}") + + @pytest.mark.parametrize( "schemafile", [ @@ -130,10 +163,10 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_ft): f.write_text( """\ a: - b: &anchor + b: - 1 - 2 - c: &anchor d + c: d """ ) loader = InstanceLoader([str(f)], default_filetype=default_ft) @@ -141,6 +174,31 @@ def test_instanceloader_yaml_data(tmp_path, filename, default_ft): assert data == [(str(f), {"a": {"b": [1, 2], "c": "d"}})] +@pytest.mark.parametrize( + "filename", + [ + "foo.yaml", + ], +) +@pytest.mark.filterwarnings("ignore:ReusedAnchorWarning") +def test_instanceloader_yaml_dup_anchor(tmp_path, filename): + f = tmp_path / filename + f.write_text( + """\ +a: + b: &anchor + - 1 + - 2 + c: &anchor d +""" + ) + loader = InstanceLoader([str(f)]) + try: + list(loader.iter_files()) + except ruamel.yaml.composer.ComposerError as e: + raise AssertionError(f"YAML loader does not support duplicate anchors {e}") + + def test_instanceloader_unknown_type(tmp_path): f = tmp_path / "foo" # no extension here f.write_text("{}") # json data (could be detected as either)