Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class HedExceptions:
HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID'

SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix'
SCHEMA_DUPLICATE_LIBRARY = "SCHEMA_LIBRARY_INVALID"
BAD_COLUMN_NAMES = 'BAD_COLUMN_NAMES'


Expand Down
23 changes: 18 additions & 5 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,19 @@ def library(self):

Returns:
str: Library name if any.

"""
return self.header_attributes.get(constants.LIBRARY_ATTRIBUTE, "")

def can_save(self):
""" Returns if it's legal to save this schema.

You cannot save schemas loaded as merged from multiple library schemas.

Returns:
bool: True if this can be saved
"""
return not self.library or "," not in self.library

@property
def with_standard(self):
""" The version of the base schema this is extended from, if it exists..
Expand Down Expand Up @@ -738,10 +747,14 @@ def _get_attributes_for_section(self, key_class):
def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# Add the InLibrary attribute to any library schemas as they are loaded
# These are later removed when they are saved out, if saving unmerged
if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
# only add it if not already present - This is a rare case
if not new_entry.has_attribute(HedKey.InLibrary):
new_entry._set_attribute_value(HedKey.InLibrary, self.library)
# if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
# # only add it if not already present - This is a rare case
# Todo ian: I think this should be moved up one level for parity with the other loading changes
# .library will be updated to potentially be a list
# Cannot save schema if .library is a list
#
# if not new_entry.has_attribute(HedKey.InLibrary):
# new_entry._set_attribute_value(HedKey.InLibrary, self.library)

section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)
Expand Down
102 changes: 88 additions & 14 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@
from hed.schema.schema_io import schema_util
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.schema_validation_util import validate_version_string
from collections import defaultdict


MAX_MEMORY_CACHE = 20
MAX_MEMORY_CACHE = 40


def from_string(schema_string, schema_format=".xml", schema_namespace=None):
def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None):
""" Create a schema from the given string.

Parameters:
schema_string (str): An XML or mediawiki file as a single long string.
schema_format (str): The schema format of the source schema string.
schema_namespace (str, None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.

Returns:
(HedSchema): The loaded schema.
Expand All @@ -39,9 +42,9 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None):
filename=schema_string)

if schema_format.endswith(".xml"):
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string)
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema)
elif schema_format.endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string)
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)

Expand All @@ -51,12 +54,14 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None):
return hed_schema


def load_schema(hed_path=None, schema_namespace=None):
def load_schema(hed_path=None, schema_namespace=None, schema=None):
""" Load a schema from the given file or URL path.

Parameters:
hed_path (str or None): A filepath or url to open a schema from.
schema_namespace (str or None): The name_prefix all tags in this schema will accept.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.

Returns:
HedSchema: The loaded schema.
Expand All @@ -77,9 +82,9 @@ def load_schema(hed_path=None, schema_namespace=None):
file_as_string = schema_util.url_to_string(hed_path)
hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
elif hed_path.lower().endswith(".xml"):
hed_schema = SchemaLoaderXML.load(hed_path)
hed_schema = SchemaLoaderXML.load(hed_path, schema=schema)
elif hed_path.lower().endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(hed_path)
hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path)

Expand Down Expand Up @@ -111,7 +116,11 @@ def _load_schema_version(xml_version=None, xml_folder=None):
""" Return specified version or latest if not specified.

Parameters:
xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z'.
xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_][X.Y.Z]'
Further versions can be added comma separated after the version number/library name.
e.g. "lib:library,otherlibrary" will load "library" and "otherlibrary" into "lib:"
The schema namespace must be the same and not repeated if loading multiple merged schemas.

xml_folder (str): Path to a folder containing schema.

Returns:
Expand All @@ -124,10 +133,44 @@ def _load_schema_version(xml_version=None, xml_folder=None):
- The prefix is invalid
"""
schema_namespace = ""
library_name = None
if xml_version:
if ":" in xml_version:
schema_namespace, _, xml_version = xml_version.partition(":")

if xml_version:
xml_versions = xml_version.split(",")
# Add a blank entry if we have no xml version
else:
xml_versions = [""]

first_schema = _load_schema_version_sub(schema_namespace, xml_versions[0], xml_folder=xml_folder)
for version in xml_versions[1:]:
_load_schema_version_sub(schema_namespace, version, xml_folder=xml_folder, schema=first_schema)
return first_schema


def _load_schema_version_sub(schema_namespace="", xml_version=None, xml_folder=None, schema=None):
""" Return specified version or latest if not specified.

Parameters:
xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_][X.Y.Z]'

xml_folder (str): Path to a folder containing schema.
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.

Returns:
HedSchema: The requested HedSchema object.

:raises HedFileError:
- The xml_version is not valid.
- The specified version cannot be found or loaded
- Other fatal errors loading the schema (These are unlikely if you are not editing them locally)
- The prefix is invalid
"""
library_name = None

if xml_version:
if "_" in xml_version:
library_name, _, xml_version = xml_version.rpartition("_")
elif validate_version_string(xml_version):
Expand All @@ -138,7 +181,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
if not final_hed_xml_file:
hed_cache.cache_local_versions(xml_folder)
final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder)
hed_schema = load_schema(final_hed_xml_file)
hed_schema = load_schema(final_hed_xml_file, schema=schema)
except HedFileError as e:
if e.code == HedExceptions.FILE_NOT_FOUND:
hed_cache.cache_xml_versions(cache_folder=xml_folder)
Expand All @@ -147,7 +190,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
raise HedFileError(HedExceptions.FILE_NOT_FOUND,
f"HED version '{xml_version}' not found in cache: {hed_cache.get_cache_directory()}",
filename=xml_folder)
hed_schema = load_schema(final_hed_xml_file)
hed_schema = load_schema(final_hed_xml_file, schema=schema)
else:
raise e

Expand All @@ -158,14 +201,14 @@ def _load_schema_version(xml_version=None, xml_folder=None):


def load_schema_version(xml_version=None, xml_folder=None):
""" Return a HedSchema or HedSchemaGroup extracted from xml_version field.
""" Return a HedSchema or HedSchemaGroup extracted from xml_version

Parameters:
xml_version (str or list or None): List or str specifying which official HED schemas to use.
An empty string returns the latest version
A json str format is also supported,
based on the output of HedSchema.get_formatted_version
Basic format: '[schema_namespace:][library_name_]X.Y.Z'.
Basic format: '[schema_namespace:][library_name_][X.Y.Z]'.
xml_folder (str): Path to a folder containing schema.

Returns:
Expand All @@ -185,10 +228,41 @@ def load_schema_version(xml_version=None, xml_folder=None):
except json.decoder.JSONDecodeError as e:
raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e
if xml_version and isinstance(xml_version, list):
schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_version]
xml_versions = parse_version_list(xml_version)
schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_versions.values()]
if len(schemas) == 1:
return schemas[0]

return HedSchemaGroup(schemas)
else:
return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder)


def parse_version_list(xml_version_list):
"""Takes a list of xml versions and returns a dictionary split by prefix

e.g. ["score", "testlib"] will return {"": "score, testlib"}
e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"}

Parameters:
xml_version_list (list): List of str specifying which hed schemas to use

Returns:
HedSchema or HedSchemaGroup: The schema or schema group extracted.
"""
out_versions = defaultdict(list)
for version in xml_version_list:
schema_namespace = ""
if version and ":" in version:
schema_namespace, _, version = version.partition(":")

if version is None:
version = ""
if version in out_versions[schema_namespace]:
raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_LIBRARY, f"Attempting to load the same library '{version}' twice: {out_versions[schema_namespace]}",
filename=None)
out_versions[schema_namespace].append(version)

out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in out_versions.items()}

return out_versions
2 changes: 1 addition & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name):
issues = []

library = tag_entry.attributes.get(attribute_name, "")
if hed_schema.library != library:
if library not in hed_schema.library.split(","):
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID,
tag_entry.name,
library)
Expand Down
52 changes: 43 additions & 9 deletions hed/schema/schema_io/base2schema.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import copy
from hed.errors.exceptions import HedFileError, HedExceptions
from hed.schema import HedSchema
from hed.schema.hed_schema_constants import HedKey
from abc import abstractmethod, ABC
from hed.schema import schema_validation_util
from hed.schema import hed_schema_constants


class SchemaLoader(ABC):
Expand All @@ -12,20 +14,21 @@ class SchemaLoader(ABC):

SchemaLoaderXML(filename) will load just the header_attributes
"""
def __init__(self, filename, schema_as_string=None):
def __init__(self, filename, schema_as_string=None, schema=None):
"""Loads the given schema from one of the two parameters.

Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.
"""
if schema_as_string and filename:
raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.",
filename)

self.filename = filename
self.schema_as_string = schema_as_string

self.appending_to_schema = False
try:
self.input_data = self._open_file()
except OSError as e:
Expand All @@ -34,11 +37,28 @@ def __init__(self, filename, schema_as_string=None):
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename)
except ValueError as e:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename)

self._schema = HedSchema()
self._schema.filename = filename

# self._schema.filename = filename
hed_attributes = self._get_header_attributes(self.input_data)
schema_validation_util.validate_attributes(hed_attributes, filename=self.filename)

withStandard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "")
self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "")
if not schema:
self._schema = HedSchema()
else:
self._schema = schema
self.appending_to_schema = True
if not self._schema.with_standard:
raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX,
"Trying to load multiple normal schemas as a merged one with the same namespace. "
"Ensure schemas have the withStandard header attribute set",
self.filename)
elif withStandard != self._schema.with_standard:
raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION,
"When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.filename)
hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}"
self._schema.filename = filename
self._schema.header_attributes = hed_attributes
self._loading_merged = False

Expand All @@ -48,16 +68,19 @@ def schema(self):
return self._schema

@classmethod
def load(cls, filename=None, schema_as_string=None):
def load(cls, filename=None, schema_as_string=None, schema=None):
""" Loads and returns the schema, including partnered schema if applicable.

Parameters:
filename(str or None): A valid filepath or None
schema_as_string(str or None): A full schema as text or None
schema(HedSchema or None): A hed schema to merge this new file into
It must be a with-standard schema with the same value.

Returns:
schema(HedSchema): The new schema
"""
loader = cls(filename, schema_as_string)
loader = cls(filename, schema_as_string, schema)
return loader._load()

def _load(self):
Expand All @@ -68,7 +91,7 @@ def _load(self):
"""
self._loading_merged = True
# Do a full load of the standard schema if this is a partnered schema
if self._schema.with_standard and not self._schema.merged:
if not self.appending_to_schema and self._schema.with_standard and not self._schema.merged:
from hed.schema.hed_schema_io import load_schema_version
saved_attr = self._schema.header_attributes
try:
Expand Down Expand Up @@ -102,3 +125,14 @@ def _get_header_attributes(self, input_data):
def _parse_data(self):
"""Puts the input data into the new schema"""
pass

def _add_to_dict_base(self, entry, key_class):
if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged:
return None

if self.library and (not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)):
# only add it if not already present - This is a rare case
if not entry.has_attribute(HedKey.InLibrary):
entry._set_attribute_value(HedKey.InLibrary, self.library)

return self._schema._add_tag_to_dict(entry.name, entry, key_class)
5 changes: 5 additions & 0 deletions hed/schema/schema_io/schema2base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Baseclass for mediawiki/xml writers"""
from hed.schema.hed_schema_constants import HedSectionKey, HedKey
from hed.errors.exceptions import HedFileError, HedExceptions


class Schema2Base:
Expand Down Expand Up @@ -29,6 +30,10 @@ def process_schema(cls, hed_schema, save_merged=False):
Varies based on inherited class

"""
if not hed_schema.can_save():
raise HedFileError(HedExceptions.SCHEMA_LIBRARY_INVALID,
"Cannot save a schema merged from multiple library schemas",
hed_schema.filename)
saver = cls()
saver._save_lib = False
saver._save_base = False
Expand Down
Loading