diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 1cd0dfa5615..7a6ecf08c74 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -21,6 +21,10 @@ error = partial(error, sys_exit=True) LOG = logging.getLogger(__name__) +VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json" +# Bump this file when introducing incompatible schema changes. +# Also add new version definition to versions.schema.json. +USERDATA_SCHEMA_FILE = "schema-cloud-config-v1.json" _YAML_MAP = {True: "true", False: "false", None: "null"} CLOUD_CONFIG_HEADER = b"#cloud-config" SCHEMA_DOC_TMPL = """ @@ -659,11 +663,22 @@ def load_doc(requested_modules: list) -> str: return docs +def get_schema_dir() -> str: + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "schemas") + + def get_schema() -> dict: """Return jsonschema coalesced from all cc_* cloud-config modules.""" - schema_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "cloud-init-schema.json" - ) + # Note versions.schema.json is publicly consumed by schemastore.org. + # If we change the location of versions.schema.json in github, we need + # to provide an updated PR to + # https://github.com/SchemaStore/schemastore. + + # When bumping schema version due to incompatible changes: + # 1. Add a new schema-cloud-config-v#.json + # 2. change the USERDATA_SCHEMA_FILE to cloud-init-schema-v#.json + # 3. Add the new version definition to versions.schema.cloud-config.json + schema_file = os.path.join(get_schema_dir(), USERDATA_SCHEMA_FILE) full_schema = None try: full_schema = json.loads(load_file(schema_file)) @@ -680,20 +695,6 @@ def get_schema() -> dict: "$schema": "http://json-schema.org/draft-04/schema#", "allOf": [], } - - # TODO( Drop the get_modules loop when all legacy cc_* schema migrates ) - # Supplement base_schema with any legacy modules which still contain a - # "schema" attribute. Legacy cc_* modules will be migrated to use the - # store module schema in the composite cloud-init-schema-.json - # and will drop "schema" at that point. - for (_, mod_name) in get_modules().items(): - # All cc_* modules need a "meta" attribute to represent schema defs - (mod_locs, _) = importer.find_module( - mod_name, ["cloudinit.config"], ["schema"] - ) - if mod_locs: - mod = importer.import_module(mod_locs[0]) - full_schema["allOf"].append(mod.schema) return full_schema diff --git a/cloudinit/config/schemas/__init__.py b/cloudinit/config/schemas/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/cloudinit/config/cloud-init-schema.json b/cloudinit/config/schemas/schema-cloud-config-v1.json similarity index 100% rename from cloudinit/config/cloud-init-schema.json rename to cloudinit/config/schemas/schema-cloud-config-v1.json diff --git a/cloudinit/config/schemas/versions.schema.cloud-config.json b/cloudinit/config/schemas/versions.schema.cloud-config.json new file mode 100644 index 00000000000..4ff3b4d159b --- /dev/null +++ b/cloudinit/config/schemas/versions.schema.cloud-config.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "$id": "https://raw.githubusercontent.com/canonical/cloud-init/main/cloudinit/config/schemas/versions.schema.cloud-config.json", + "oneOf": [ + { + "allOf": [ + { + "properties": { + "version": { + "enum": ["22.2", "v1"] + } + } + }, + {"$ref": "./schema-cloud-config-v1.json"} + ] + } + ] +} diff --git a/tests/integration_tests/util.py b/tests/integration_tests/util.py index 2052d7987e7..ec6b1434743 100644 --- a/tests/integration_tests/util.py +++ b/tests/integration_tests/util.py @@ -75,7 +75,10 @@ def verify_clean_log(log): for traceback_text in traceback_texts: expected_tracebacks += log.count(traceback_text) - assert warning_count == expected_warnings + assert warning_count == expected_warnings, ( + f"Unexpected warning count != {expected_warnings}. Found: " + f"{re.findall('WARNING.*', log)}" + ) assert traceback_count == expected_tracebacks diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 43f5a68d9ce..c75b7227099 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -4,24 +4,29 @@ import importlib import inspect import itertools +import json import logging +import os import sys -from copy import copy +from copy import copy, deepcopy from pathlib import Path from textwrap import dedent from types import ModuleType from typing import List +import jsonschema import pytest from cloudinit.config.schema import ( CLOUD_CONFIG_HEADER, + VERSIONED_USERDATA_SCHEMA_FILE, MetaSchema, SchemaValidationError, annotated_cloudconfig_file, get_jsonschema_validator, get_meta_doc, get_schema, + get_schema_dir, load_doc, main, validate_cloudconfig_file, @@ -31,7 +36,7 @@ from cloudinit.distros import OSFAMILIES from cloudinit.safeyaml import load, load_with_marks from cloudinit.settings import FREQUENCIES -from cloudinit.util import write_file +from cloudinit.util import load_file, write_file from tests.unittests.helpers import ( CiTestCase, cloud_init_project_dir, @@ -90,6 +95,65 @@ def get_module_variable(var_name) -> dict: return schemas +class TestVersionedSchemas: + def _relative_ref_to_local_file_path(self, source_schema): + """Replace known relative ref URLs with full file path.""" + # jsonschema 2.6.0 doesn't support relative URLs in $refs (bionic) + full_path_schema = deepcopy(source_schema) + relative_ref = full_path_schema["oneOf"][0]["allOf"][1]["$ref"] + full_local_filepath = get_schema_dir() + relative_ref[1:] + file_ref = f"file://{full_local_filepath}" + full_path_schema["oneOf"][0]["allOf"][1]["$ref"] = file_ref + return full_path_schema + + @pytest.mark.parametrize( + "schema,error_msg", + ( + ({}, None), + ({"version": "v1"}, None), + ({"version": "v2"}, "is not valid"), + ({"version": "v1", "final_message": -1}, "is not valid"), + ({"version": "v1", "final_message": "some msg"}, None), + ), + ) + def test_versioned_cloud_config_schema_is_valid_json( + self, schema, error_msg + ): + version_schemafile = os.path.join( + get_schema_dir(), VERSIONED_USERDATA_SCHEMA_FILE + ) + version_schema = json.loads(load_file(version_schemafile)) + # To avoid JSON resolver trying to pull the reference from our + # upstream raw file in github. + version_schema["$id"] = f"file://{version_schemafile}" + if error_msg: + with pytest.raises(SchemaValidationError) as context_mgr: + try: + validate_cloudconfig_schema( + schema, schema=version_schema, strict=True + ) + except jsonschema.exceptions.RefResolutionError: + full_path_schema = self._relative_ref_to_local_file_path( + version_schema + ) + validate_cloudconfig_schema( + schema, schema=full_path_schema, strict=True + ) + assert error_msg in str(context_mgr.value) + else: + try: + validate_cloudconfig_schema( + schema, schema=version_schema, strict=True + ) + except jsonschema.exceptions.RefResolutionError: + full_path_schema = self._relative_ref_to_local_file_path( + version_schema + ) + validate_cloudconfig_schema( + schema, schema=full_path_schema, strict=True + ) + + class TestGetSchema: def test_static_schema_file_is_valid(self, caplog): with caplog.at_level(logging.WARNING): @@ -179,15 +243,11 @@ class TestLoadDoc: docs = get_module_variable("__doc__") - # TODO( Drop legacy test when all sub-schemas in cloud-init-schema.json ) @pytest.mark.parametrize( "module_name", - ( - "cc_apt_pipelining", # new style composite schema file - "cc_install_hotplug", # legacy sub-schema defined in module - ), + ("cc_apt_pipelining",), # new style composite schema file ) - def test_report_docs_for_legacy_and_consolidated_schema(self, module_name): + def test_report_docs_consolidated_schema(self, module_name): doc = load_doc([module_name]) assert doc, "Unexpected empty docs for {}".format(module_name) assert self.docs[module_name] == doc @@ -329,7 +389,7 @@ def test_validateconfig_schema_of_example(self, schema_id, example): """ schema = get_schema() config_load = load(example) - # cloud-init-schema is permissive of additionalProperties at the + # cloud-init-schema-v1 is permissive of additionalProperties at the # top-level. # To validate specific schemas against known documented examples # we need to only define the specific module schema and supply