diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index b5dfb6060..e5e51d7c5 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -227,6 +227,8 @@ def val_warning_capitalization(tag): @hed_tag_error(ValidationErrors.UNITS_MISSING, default_severity=ErrorSeverity.WARNING) def val_warning_default_units_used(tag, default_unit): + if default_unit is None: + return f"No unit specified on - '{tag}'. Multiple default values exist and cannot be inferred" return f"No unit specified. Using '{default_unit}' as the default - '{tag}'" diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 9d470002b..7e579815f 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -328,7 +328,7 @@ def _calculate_to_canonical_forms(self, hed_schema): return tag_issues def get_stripped_unit_value(self): - """ Return the extension portion without units. + """ Return the extension divided into value and units, if the units are valid. Returns: stripped_unit_value (str): The extension portion with the units removed. @@ -345,6 +345,32 @@ def get_stripped_unit_value(self): return self.extension, None + def value_as_default_unit(self): + """ Returns the value converted to default units if possible. + + Returns None if the units are invalid.(No default unit or invalid) + + Returns: + value (float or None): The extension value as default units. + If there are not default units, returns None. + + Examples: + 'Duration/300 ms' will return .3 + + """ + tag_unit_classes = self.unit_classes + value, _, units = self.extension.rpartition(" ") + if not value: + stripped_value = units + unit = self.default_unit + else: + stripped_value, unit = self._get_tag_units_portion(tag_unit_classes) + + if stripped_value: + if unit.attributes.get("conversionFactor"): + conversion_factor = unit.attributes.get("conversionFactor", 1.0) + return float(stripped_value) * float(conversion_factor) + @property def unit_classes(self): """ Return a dict of all the unit classes this tag accepts. @@ -476,20 +502,19 @@ def get_tag_unit_class_units(self): return units - def get_unit_class_default_unit(self): + @property + def default_unit(self): """ Get the default unit class unit for this tag. + Only a tag with a single unit class can have default units. Returns: - str: The default unit class unit associated with the specific tag or an empty string. - + unit(UnitEntry or None): the default unit entry for this tag, or None """ - default_unit = '' unit_classes = self.unit_classes.values() - if unit_classes: + if len(unit_classes) == 1: first_unit_class_entry = list(unit_classes)[0] default_unit = first_unit_class_entry.has_attribute(HedKey.DefaultUnits, return_value=True) - - return default_unit + return first_unit_class_entry.units.get(default_unit, None) def base_tag_has_attribute(self, tag_attribute): """ Check to see if the tag has a specific attribute. @@ -536,8 +561,9 @@ def _get_tag_units_portion(self, tag_unit_classes): tag_unit_classes (dict): Dictionary of valid UnitClassEntry objects for this tag. Returns: - stripped_value (str): The value with the units removed. - + stripped_value (str or None): The value with the units removed. + This is filled in if there are no units as well. + unit (UnitEntry or None): The matching unit entry if one is found """ value, _, units = self.extension.rpartition(" ") if not units: @@ -548,12 +574,12 @@ def _get_tag_units_portion(self, tag_unit_classes): possible_match = self._find_modifier_unit_entry(units, all_valid_unit_permutations) if possible_match and not possible_match.has_attribute(HedKey.UnitPrefix): - return value, units + return value, possible_match # Repeat the above, but as a prefix possible_match = self._find_modifier_unit_entry(value, all_valid_unit_permutations) if possible_match and possible_match.has_attribute(HedKey.UnitPrefix): - return units, value + return possible_match, value return None, None diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 2ea635050..797bd2460 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -2,6 +2,8 @@ from hed.schema.hed_schema_constants import HedKey import inflect +import copy + pluralize = inflect.engine() pluralize.defnoun("hertz", "hertz") @@ -133,6 +135,20 @@ def get_known_attributes(self): return {key: value for key, value in self.attributes.items() if not self._unknown_attributes or key not in self._unknown_attributes} + # Give a default deep copy that excludes the _section attribute + def __deepcopy__(self, memo): + # Create a new instance + new_obj = self.__class__.__new__(self.__class__) + memo[id(self)] = new_obj # Add the new object to the memo to handle cyclic references + + for k, v in self.__dict__.items(): + if k != "_section": + new_val = copy.deepcopy(v, memo) + else: + new_val = v + setattr(new_obj, k, new_val) + return new_obj + class UnitClassEntry(HedSchemaEntry): """ A single unit class entry in the HedSchema. """ @@ -169,7 +185,11 @@ def finalize_entry(self, schema): for derived_unit in new_derivative_units: derivative_units[derived_unit] = unit_entry for modifier in unit_entry.unit_modifiers: - derivative_units[modifier.name + derived_unit] = unit_entry + new_entry = copy.deepcopy(unit_entry) + derivative_units[modifier.name + derived_unit] = new_entry + new_entry.unit_class_name = derived_unit + new_entry.attributes["conversionFactor"] = new_entry.get_conversion_factor(modifier_entry=modifier) + self.derivative_units = derivative_units def __eq__(self, other): @@ -182,7 +202,6 @@ def __eq__(self, other): class UnitEntry(HedSchemaEntry): """ A single unit entry with modifiers in the HedSchema. """ - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.unit_class_name = None @@ -197,6 +216,19 @@ def finalize_entry(self, schema): """ self.unit_modifiers = schema._get_modifiers_for_unit(self.name) + def get_conversion_factor(self, modifier_entry): + """Returns the conversion factor from combining this unit with the specified modifier + + Parameters: + modifier_entry (HedSchemaEntry): The modifier to apply + + Returns: + conversion_factor(float): Defaults to 1.0 conversion factor if not present on unit and modifier. + """ + base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e")) + modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e")) + return base_factor * modifier_factor + class HedTagEntry(HedSchemaEntry): """ A single tag entry in the HedSchema. """ diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py index 586d823da..f0d585a70 100644 --- a/hed/validator/tag_validator.py +++ b/hed/validator/tag_validator.py @@ -307,7 +307,7 @@ def _check_units(self, original_tag, bad_units, report_as): validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_INVALID, tag=report_as, units=tag_unit_class_units) else: - default_unit = original_tag.get_unit_class_default_unit() + default_unit = original_tag.default_unit validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_MISSING, tag=report_as, default_unit=default_unit) return validation_issue @@ -378,26 +378,6 @@ def check_tag_requires_child(self, original_tag): tag=original_tag) return validation_issues - def check_tag_unit_class_units_exist(self, original_tag): - """ Report warning if tag has a unit class tag with no units. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - - Returns: - list: Validation issues. Each issue is a dictionary. - - """ - validation_issues = [] - if original_tag.is_unit_class_tag(): - tag_unit_values = original_tag.extension - if tag_validator_util.validate_numeric_value_class(tag_unit_values): - default_unit = original_tag.get_unit_class_default_unit() - validation_issues += ErrorHandler.format_error(ValidationErrors.UNITS_MISSING, - tag=original_tag, - default_unit=default_unit) - return validation_issues - def check_for_invalid_extension_chars(self, original_tag): """Report invalid characters in extension/value. diff --git a/readthedocs.yml b/readthedocs.yml index 3a0eed7e6..bf5d7274d 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -5,7 +5,9 @@ formats: - pdf build: - image: latest + os: "ubuntu-22.04" + tools: + python: "3.8" # Build documentation in the docs/ directory with Sphinx sphinx: @@ -15,7 +17,6 @@ sphinx: python: - version: 3.8 install: - requirements: docs/requirements.txt system_packages: true diff --git a/tests/models/test_hed_tag.py b/tests/models/test_hed_tag.py index 50b1e1587..97e0ee247 100644 --- a/tests/models/test_hed_tag.py +++ b/tests/models/test_hed_tag.py @@ -1,10 +1,16 @@ from hed.models.hed_tag import HedTag from tests.validator.test_tag_validator_base import TestHedBase from hed.schema import HedKey +from hed import load_schema_version + +from tests.schema import util_create_schemas class TestValidatorUtilityFunctions(TestHedBase): - schema_file = '../data/schema_tests/HED8.0.0t.xml' + + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.2.0") def test_if_tag_exists(self): valid_tag1 = HedTag('Left-handed', hed_schema=self.hed_schema) @@ -37,7 +43,9 @@ def test_if_tag_exists(self): class TestSchemaUtilityFunctions(TestHedBase): - schema_file = '../data/schema_tests/HED8.0.0t.xml' + @classmethod + def setUpClass(cls): + cls.hed_schema = load_schema_version("8.2.0") def test_correctly_determine_tag_takes_value(self): value_tag1 = HedTag('Distance/35 px', hed_schema=self.hed_schema) @@ -65,14 +73,14 @@ def test_should_determine_default_unit(self): # schema=self.schema) no_unit_class_tag = HedTag('RGB-red/0.5', hed_schema=self.hed_schema) no_value_tag = HedTag('Black', hed_schema=self.hed_schema) - unit_class_tag1_result = unit_class_tag1.get_unit_class_default_unit() - # unit_class_tag2_result = unit_class_tag2.get_unit_class_default_unit() - no_unit_class_tag_result = no_unit_class_tag.get_unit_class_default_unit() - no_value_tag_result = no_value_tag.get_unit_class_default_unit() - self.assertEqual(unit_class_tag1_result, 's') + unit_class_tag1_result = unit_class_tag1.default_unit + # unit_class_tag2_result = unit_class_tag2.default_unit + no_unit_class_tag_result = no_unit_class_tag.default_unit + no_value_tag_result = no_value_tag.default_unit + self.assertEqual(unit_class_tag1_result.name, 's') # self.assertEqual(unit_class_tag2_result, '$') - self.assertEqual(no_unit_class_tag_result, '') - self.assertEqual(no_value_tag_result, '') + self.assertEqual(no_unit_class_tag_result, None) + self.assertEqual(no_value_tag_result, None) def test_correctly_determine_tag_unit_classes(self): unit_class_tag1 = HedTag('distance/35 px', hed_schema=self.hed_schema) @@ -97,13 +105,14 @@ def test_determine_tags_legal_units(self): unit_class_tag1_result = unit_class_tag1.get_tag_unit_class_units() # unit_class_tag2_result = unit_class_tag2.get_tag_unit_class_units() no_unit_class_tag_result = no_unit_class_tag.get_tag_unit_class_units() - self.assertCountEqual(unit_class_tag1_result, [ + self.assertCountEqual(sorted(unit_class_tag1_result), sorted([ 'inch', 'm', 'foot', 'metre', + 'meter', 'mile', - ]) + ])) # self.assertCountEqual(unit_class_tag2_result, [ # 'dollar', # '$', @@ -160,4 +169,20 @@ def test_determine_allows_extensions(self): self.assertEqual(extension_tag1_result, True) self.assertEqual(no_extension_tag1_result, False) self.assertEqual(no_extension_tag2_result, False) - self.assertEqual(no_extension_tag3_result, False) \ No newline at end of file + self.assertEqual(no_extension_tag3_result, False) + + def test_get_as_default_units(self): + tag = HedTag("Duration/300 ms", hed_schema=self.hed_schema) + self.assertAlmostEqual(0.3, tag.value_as_default_unit()) + + tag2 = HedTag("Duration/300", hed_schema=self.hed_schema) + self.assertAlmostEqual(300, tag2.value_as_default_unit()) + + tag3 = HedTag("Duration/300 m", hed_schema=self.hed_schema) + self.assertEqual(None, tag3.value_as_default_unit()) + + tag4 = HedTag("IntensityTakesValue/300", hed_schema=util_create_schemas.load_schema_intensity()) + self.assertEqual(300, tag4.value_as_default_unit()) + + tag5 = HedTag("IntensityTakesValue/300 cd", hed_schema=util_create_schemas.load_schema_intensity()) + self.assertEqual(None, tag5.value_as_default_unit()) diff --git a/tests/schema/test_schema_compare.py b/tests/schema/test_schema_compare.py index 6356f67e8..f6b1ceed1 100644 --- a/tests/schema/test_schema_compare.py +++ b/tests/schema/test_schema_compare.py @@ -1,54 +1,19 @@ import unittest import json -from hed.schema import HedKey, HedSectionKey, from_string +from hed.schema import HedKey, HedSectionKey from hed.schema.schema_compare import compare_schemas, find_matching_tags, \ _pretty_print_diff_all, _pretty_print_missing_all, compare_differences from hed import load_schema_version +from . import util_create_schemas -class TestSchemaComparison(unittest.TestCase): - library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.2.0" unmerged="true" - -'''Prologue''' - -!# start schema - -""" - - library_schema_end = """ -!# end schema - -!# end hed - """ - - def _get_test_schema(self, node_lines): - library_schema_string = self.library_schema_start + "\n".join(node_lines) + self.library_schema_end - test_schema = from_string(library_schema_string, ".mediawiki") - - return test_schema - - def load_schema1(self): - test_nodes = ["'''TestNode''' [This is a simple test node]\n", - " *TestNode2", - " *TestNode3", - " *TestNode4" - ] - return self._get_test_schema(test_nodes) - - def load_schema2(self): - test_nodes = ["'''TestNode''' [This is a simple test node]\n", - " *TestNode2", - " **TestNode3", - " *TestNode5" - ] - - return self._get_test_schema(test_nodes) +class TestSchemaComparison(unittest.TestCase): def test_find_matching_tags(self): # create entries for schema1 - schema1 = self.load_schema1() - schema2 = self.load_schema2() + schema1 = util_create_schemas.load_schema1() + schema2 = util_create_schemas.load_schema2() result = find_matching_tags(schema1, schema2) # Check if the result is correct @@ -82,8 +47,8 @@ def test_find_matching_tags(self): self.assertNotIn("summary", json_style_dict_no_summary) def test_compare_schemas(self): - schema1 = self.load_schema1() - schema2 = self.load_schema2() + schema1 = util_create_schemas.load_schema1() + schema2 = util_create_schemas.load_schema2() matches, not_in_schema1, not_in_schema2, unequal_entries = compare_schemas(schema1, schema2) @@ -103,8 +68,8 @@ def test_compare_schemas(self): self.assertIn("TestNode3", unequal_entries[HedSectionKey.Tags]) def test_compare_differences(self): - schema1 = self.load_schema1() - schema2 = self.load_schema2() + schema1 = util_create_schemas.load_schema1() + schema2 = util_create_schemas.load_schema2() not_in_schema1, not_in_schema2, unequal_entries = compare_differences(schema1, schema2) diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py new file mode 100644 index 000000000..850d014eb --- /dev/null +++ b/tests/schema/util_create_schemas.py @@ -0,0 +1,47 @@ +from hed.schema import HedKey, HedSectionKey, from_string + + +library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.2.0" unmerged="true" + +'''Prologue''' + +!# start schema + +""" + +library_schema_end = """ +!# end schema + +!# end hed + """ + +def _get_test_schema(node_lines): + library_schema_string = library_schema_start + "\n".join(node_lines) + library_schema_end + test_schema = from_string(library_schema_string, ".mediawiki") + + return test_schema + + +def load_schema1(): + test_nodes = ["'''TestNode''' [This is a simple test node]\n", + " *TestNode2", + " *TestNode3", + " *TestNode4" + ] + return _get_test_schema(test_nodes) + + +def load_schema2(): + test_nodes = ["'''TestNode''' [This is a simple test node]\n", + " *TestNode2", + " **TestNode3", + " *TestNode5" + ] + + return _get_test_schema(test_nodes) + + +def load_schema_intensity(): + test_nodes = ["'''IntensityTakesValue'''", + " * # {unitClass=intensityUnits}"] + return _get_test_schema(test_nodes) \ No newline at end of file