From 7023935ffc0bddab3553dfe5e24f365c97171db7 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Mon, 2 May 2022 18:53:51 -0600 Subject: [PATCH 1/6] schema: version schema-cloud-config-v1.json Provide top-level version.schema.cloud-config.json which will be consumed by https://github.com/SchemaStore/schemastore/ and provided for json validator tooling. Rename cloud-init-schema.json to schema-cloud-config-v1.json to allow space for providing static scoped schema definitions for vendor-data, meta-data and network-config in the future. --- ...schema.json => schema-cloud-config-v1.json} | 0 cloudinit/config/schema.py | 14 +++++++++++++- .../config/versions.schema.cloud-config.json | 18 ++++++++++++++++++ tests/unittests/config/test_schema.py | 10 +++------- 4 files changed, 34 insertions(+), 8 deletions(-) rename cloudinit/config/{cloud-init-schema.json => schema-cloud-config-v1.json} (100%) create mode 100644 cloudinit/config/versions.schema.cloud-config.json diff --git a/cloudinit/config/cloud-init-schema.json b/cloudinit/config/schema-cloud-config-v1.json similarity index 100% rename from cloudinit/config/cloud-init-schema.json rename to cloudinit/config/schema-cloud-config-v1.json diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 1cd0dfa5615..79a7dfc620b 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -21,6 +21,9 @@ error = partial(error, sys_exit=True) LOG = logging.getLogger(__name__) +# Bump this file when introducing incompatible schema changes. +# Also add new version definition to versions.schema.json. +USERDATA_SCHEMA_FILE = "schema-cloud-config-v1.json" _YAML_MAP = {True: "true", False: "false", None: "null"} CLOUD_CONFIG_HEADER = b"#cloud-config" SCHEMA_DOC_TMPL = """ @@ -661,8 +664,17 @@ def load_doc(requested_modules: list) -> str: def get_schema() -> dict: """Return jsonschema coalesced from all cc_* cloud-config modules.""" + # Note versions.schema.json is publicly consumed by schemastore.org. + # If we change the location of versions.schema.json in github, we need + # to provide an updated PR to + # https://github.com/SchemaStore/schemastore. + + # When bumping schema version due to incompatible changes: + # 1. Add a new schema-cloud-config-v#.json + # 2. change the USERDATA_SCHEMA_FILE to cloud-init-schema-v#.json + # 3. Add the new version definition to versions.schema.cloud-config.json schema_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "cloud-init-schema.json" + os.path.dirname(os.path.abspath(__file__)), USERDATA_SCHEMA_FILE ) full_schema = None try: diff --git a/cloudinit/config/versions.schema.cloud-config.json b/cloudinit/config/versions.schema.cloud-config.json new file mode 100644 index 00000000000..2a02d444190 --- /dev/null +++ b/cloudinit/config/versions.schema.cloud-config.json @@ -0,0 +1,18 @@ +{ + "$id": "https://github.com/canonical/cloud-init/tree/main/cloudinit/config/schema/versions.schema.cloud-config.json", + "$schema": "http://json-schema.org/draft-04/schema#", + "oneOf": [ + { + "allOf": [ + { + "properties": { + "version": { + "enum": ["22.2", "v1"] + } + }, + "$ref": "https://raw.githubusercontent.com/canonical/cloud-init/main/cloudinit/config/schema-cloud-config-v1.json" + } + ] + } + ] +} diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 43f5a68d9ce..032dcfa4cdb 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -179,15 +179,11 @@ class TestLoadDoc: docs = get_module_variable("__doc__") - # TODO( Drop legacy test when all sub-schemas in cloud-init-schema.json ) @pytest.mark.parametrize( "module_name", - ( - "cc_apt_pipelining", # new style composite schema file - "cc_install_hotplug", # legacy sub-schema defined in module - ), + ("cc_apt_pipelining",), # new style composite schema file ) - def test_report_docs_for_legacy_and_consolidated_schema(self, module_name): + def test_report_docs_consolidated_schema(self, module_name): doc = load_doc([module_name]) assert doc, "Unexpected empty docs for {}".format(module_name) assert self.docs[module_name] == doc @@ -329,7 +325,7 @@ def test_validateconfig_schema_of_example(self, schema_id, example): """ schema = get_schema() config_load = load(example) - # cloud-init-schema is permissive of additionalProperties at the + # cloud-init-schema-v1 is permissive of additionalProperties at the # top-level. # To validate specific schemas against known documented examples # we need to only define the specific module schema and supply From eeab923138bf552e9456ff06ca933a5fe3ddfa1b Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Fri, 13 May 2022 11:30:39 -0600 Subject: [PATCH 2/6] schema: move to schemas subdir, correct version schema id URL - Move static schema files into cloudinit/config/schemas. - versions.schema.cloud-config.json to use correct raw github URL as $id - add get_schema_dir for use in testing --- cloudinit/config/schema.py | 23 +++-------- .../{ => schemas}/schema-cloud-config-v1.json | 0 .../versions.schema.cloud-config.json | 4 +- tests/unittests/config/test_schema.py | 38 ++++++++++++++++++- 4 files changed, 45 insertions(+), 20 deletions(-) rename cloudinit/config/{ => schemas}/schema-cloud-config-v1.json (100%) rename cloudinit/config/{ => schemas}/versions.schema.cloud-config.json (50%) diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 79a7dfc620b..7a6ecf08c74 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -21,6 +21,7 @@ error = partial(error, sys_exit=True) LOG = logging.getLogger(__name__) +VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json" # Bump this file when introducing incompatible schema changes. # Also add new version definition to versions.schema.json. USERDATA_SCHEMA_FILE = "schema-cloud-config-v1.json" @@ -662,6 +663,10 @@ def load_doc(requested_modules: list) -> str: return docs +def get_schema_dir() -> str: + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "schemas") + + def get_schema() -> dict: """Return jsonschema coalesced from all cc_* cloud-config modules.""" # Note versions.schema.json is publicly consumed by schemastore.org. @@ -673,9 +678,7 @@ def get_schema() -> dict: # 1. Add a new schema-cloud-config-v#.json # 2. change the USERDATA_SCHEMA_FILE to cloud-init-schema-v#.json # 3. Add the new version definition to versions.schema.cloud-config.json - schema_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), USERDATA_SCHEMA_FILE - ) + schema_file = os.path.join(get_schema_dir(), USERDATA_SCHEMA_FILE) full_schema = None try: full_schema = json.loads(load_file(schema_file)) @@ -692,20 +695,6 @@ def get_schema() -> dict: "$schema": "http://json-schema.org/draft-04/schema#", "allOf": [], } - - # TODO( Drop the get_modules loop when all legacy cc_* schema migrates ) - # Supplement base_schema with any legacy modules which still contain a - # "schema" attribute. Legacy cc_* modules will be migrated to use the - # store module schema in the composite cloud-init-schema-.json - # and will drop "schema" at that point. - for (_, mod_name) in get_modules().items(): - # All cc_* modules need a "meta" attribute to represent schema defs - (mod_locs, _) = importer.find_module( - mod_name, ["cloudinit.config"], ["schema"] - ) - if mod_locs: - mod = importer.import_module(mod_locs[0]) - full_schema["allOf"].append(mod.schema) return full_schema diff --git a/cloudinit/config/schema-cloud-config-v1.json b/cloudinit/config/schemas/schema-cloud-config-v1.json similarity index 100% rename from cloudinit/config/schema-cloud-config-v1.json rename to cloudinit/config/schemas/schema-cloud-config-v1.json diff --git a/cloudinit/config/versions.schema.cloud-config.json b/cloudinit/config/schemas/versions.schema.cloud-config.json similarity index 50% rename from cloudinit/config/versions.schema.cloud-config.json rename to cloudinit/config/schemas/versions.schema.cloud-config.json index 2a02d444190..7f01dfe992c 100644 --- a/cloudinit/config/versions.schema.cloud-config.json +++ b/cloudinit/config/schemas/versions.schema.cloud-config.json @@ -1,6 +1,6 @@ { - "$id": "https://github.com/canonical/cloud-init/tree/main/cloudinit/config/schema/versions.schema.cloud-config.json", "$schema": "http://json-schema.org/draft-04/schema#", + "$id": "https://raw.githubusercontent.com/canonical/cloud-init/main/cloudinit/config/schemas/versions.schema.cloud-config.json", "oneOf": [ { "allOf": [ @@ -10,7 +10,7 @@ "enum": ["22.2", "v1"] } }, - "$ref": "https://raw.githubusercontent.com/canonical/cloud-init/main/cloudinit/config/schema-cloud-config-v1.json" + "$ref": "./schema-cloud-config-v1.json" } ] } diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 032dcfa4cdb..ecd8ead57c7 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -4,7 +4,10 @@ import importlib import inspect import itertools +import json import logging +import os +import re import sys from copy import copy from pathlib import Path @@ -16,12 +19,14 @@ from cloudinit.config.schema import ( CLOUD_CONFIG_HEADER, + VERSIONED_USERDATA_SCHEMA_FILE, MetaSchema, SchemaValidationError, annotated_cloudconfig_file, get_jsonschema_validator, get_meta_doc, get_schema, + get_schema_dir, load_doc, main, validate_cloudconfig_file, @@ -31,7 +36,7 @@ from cloudinit.distros import OSFAMILIES from cloudinit.safeyaml import load, load_with_marks from cloudinit.settings import FREQUENCIES -from cloudinit.util import write_file +from cloudinit.util import load_file, write_file from tests.unittests.helpers import ( CiTestCase, cloud_init_project_dir, @@ -90,6 +95,37 @@ def get_module_variable(var_name) -> dict: return schemas +class TestVersionedSchemas: + @pytest.mark.parametrize( + "schema,error_msg", + ( + ({}, None), + ({"version": "v1"}, None), + ({"version": "v2"}, re.escape("{'version': 'v2'} is not valid")), + ), + ) + def test_versioned_cloud_config_schema_is_valid_json( + self, schema, error_msg + ): + version_schemafile = os.path.join( + get_schema_dir(), VERSIONED_USERDATA_SCHEMA_FILE + ) + version_schema = json.loads(load_file(version_schemafile)) + # To avoid JSON resolver trying to pull the reference from our + # upstream raw file in github. + version_schema["$id"] = f"file://{version_schemafile}" + if error_msg: + with pytest.raises(SchemaValidationError) as context_mgr: + validate_cloudconfig_schema( + schema, schema=version_schema, strict=True + ) + assert error_msg in str(context_mgr.value) + else: + validate_cloudconfig_schema( + schema, schema=version_schema, strict=True + ) + + class TestGetSchema: def test_static_schema_file_is_valid(self, caplog): with caplog.at_level(logging.WARNING): From 5bde6f00706bfde825edb1b3ee7e4e426a9e9bc2 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Fri, 13 May 2022 12:14:36 -0600 Subject: [PATCH 3/6] tests: add failure case scoped at cloud-config-schema-1 --- tests/unittests/config/test_schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index ecd8ead57c7..3628ef19410 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -101,7 +101,9 @@ class TestVersionedSchemas: ( ({}, None), ({"version": "v1"}, None), - ({"version": "v2"}, re.escape("{'version': 'v2'} is not valid")), + ({"version": "v2"}, "is not valid"), + ({"version": "v1", "final_message": -1}, "is not valid"), + ({"version": "v1", "final_message": "some msg"}, None), ), ) def test_versioned_cloud_config_schema_is_valid_json( From 2665793ce7724f86caecf4587e9a92d3d9d5abfe Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Sat, 14 May 2022 22:21:29 -0600 Subject: [PATCH 4/6] tests: adapt relative $ref links for jsonschema 2.6.0 --- .../schemas/versions.schema.cloud-config.json | 6 +-- tests/unittests/config/test_schema.py | 40 +++++++++++++++---- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/cloudinit/config/schemas/versions.schema.cloud-config.json b/cloudinit/config/schemas/versions.schema.cloud-config.json index 7f01dfe992c..4ff3b4d159b 100644 --- a/cloudinit/config/schemas/versions.schema.cloud-config.json +++ b/cloudinit/config/schemas/versions.schema.cloud-config.json @@ -9,9 +9,9 @@ "version": { "enum": ["22.2", "v1"] } - }, - "$ref": "./schema-cloud-config-v1.json" - } + } + }, + {"$ref": "./schema-cloud-config-v1.json"} ] } ] diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 3628ef19410..c75b7227099 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -7,14 +7,14 @@ import json import logging import os -import re import sys -from copy import copy +from copy import copy, deepcopy from pathlib import Path from textwrap import dedent from types import ModuleType from typing import List +import jsonschema import pytest from cloudinit.config.schema import ( @@ -96,6 +96,16 @@ def get_module_variable(var_name) -> dict: class TestVersionedSchemas: + def _relative_ref_to_local_file_path(self, source_schema): + """Replace known relative ref URLs with full file path.""" + # jsonschema 2.6.0 doesn't support relative URLs in $refs (bionic) + full_path_schema = deepcopy(source_schema) + relative_ref = full_path_schema["oneOf"][0]["allOf"][1]["$ref"] + full_local_filepath = get_schema_dir() + relative_ref[1:] + file_ref = f"file://{full_local_filepath}" + full_path_schema["oneOf"][0]["allOf"][1]["$ref"] = file_ref + return full_path_schema + @pytest.mark.parametrize( "schema,error_msg", ( @@ -118,14 +128,30 @@ def test_versioned_cloud_config_schema_is_valid_json( version_schema["$id"] = f"file://{version_schemafile}" if error_msg: with pytest.raises(SchemaValidationError) as context_mgr: + try: + validate_cloudconfig_schema( + schema, schema=version_schema, strict=True + ) + except jsonschema.exceptions.RefResolutionError: + full_path_schema = self._relative_ref_to_local_file_path( + version_schema + ) + validate_cloudconfig_schema( + schema, schema=full_path_schema, strict=True + ) + assert error_msg in str(context_mgr.value) + else: + try: validate_cloudconfig_schema( schema, schema=version_schema, strict=True ) - assert error_msg in str(context_mgr.value) - else: - validate_cloudconfig_schema( - schema, schema=version_schema, strict=True - ) + except jsonschema.exceptions.RefResolutionError: + full_path_schema = self._relative_ref_to_local_file_path( + version_schema + ) + validate_cloudconfig_schema( + schema, schema=full_path_schema, strict=True + ) class TestGetSchema: From a0a7623452616bea71190825c2324bab26156ae3 Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Mon, 16 May 2022 11:41:20 -0600 Subject: [PATCH 5/6] tests: report unexpected warning content when assertion fails --- tests/integration_tests/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/util.py b/tests/integration_tests/util.py index 2052d7987e7..ec6b1434743 100644 --- a/tests/integration_tests/util.py +++ b/tests/integration_tests/util.py @@ -75,7 +75,10 @@ def verify_clean_log(log): for traceback_text in traceback_texts: expected_tracebacks += log.count(traceback_text) - assert warning_count == expected_warnings + assert warning_count == expected_warnings, ( + f"Unexpected warning count != {expected_warnings}. Found: " + f"{re.findall('WARNING.*', log)}" + ) assert traceback_count == expected_tracebacks From 9909fd27a602cbd2b932ffda37186d644bcef0fe Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Mon, 16 May 2022 11:59:50 -0600 Subject: [PATCH 6/6] packaging: add cloudinit.config.schemas --- cloudinit/config/schemas/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cloudinit/config/schemas/__init__.py diff --git a/cloudinit/config/schemas/__init__.py b/cloudinit/config/schemas/__init__.py new file mode 100644 index 00000000000..e69de29bb2d