From c01f279ee3305d402cadfb922c20562e44020911 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 15 Sep 2025 09:35:28 -0500 Subject: [PATCH 1/2] Corrected the handling of duplicate definitions --- hed/errors/error_messages.py | 18 +++---- hed/errors/error_reporter.py | 4 +- hed/errors/error_types.py | 1 + hed/models/definition_dict.py | 36 +++++++------- hed/models/definition_entry.py | 18 ++++++- pyproject.toml | 8 +++- tests/models/test_definition_dict.py | 15 ++++++ tests/models/test_definition_entry.py | 68 ++++++++++++++++++++++++++- tests/models/test_sidecar.py | 11 +++-- tests/validator/test_def_validator.py | 17 +++---- 10 files changed, 155 insertions(+), 41 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index b6b6f726e..b016aeeb5 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -90,7 +90,7 @@ def val_error_hed_placeholder_out_of_context(tag): @hed_tag_error(ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, has_sub_tag=False, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) def val_error_curly_brace_unsupported_here(tag, problem_tag): - return (f"Curly braces are only permitted in sidecars, fully wrapping text in place of a tag. " + return (f"Curly braces are only permitted in metadata (e.g., a BIDS sidecar or NWB MeaningsTable) and must contain a column name. " f"Invalid character '{problem_tag}' in tag '{tag}'") @@ -184,7 +184,7 @@ def val_error_extra_column(column_name): @hed_error(ValidationErrors.SIDECAR_AND_OTHER_COLUMNS) def val_error_sidecar_with_column(column_names): - return f"You cannot use a sidecar and tag or prefix columns at the same time. " \ + return f"You cannot use a column name in curly braces and to designate a tag column. " \ f"Found {column_names}." @@ -193,7 +193,7 @@ def val_error_duplicate_column(column_number, column_name, list_name): if column_name: return f"Found column '{column_name}' at index {column_number} twice in {list_name}." else: - return f"Found column number {column_number} twice in {list_name}. This may indicate a mistake." + return f"Found column number {column_number} twice in {list_name}. This may indicate a mistake." @hed_error(ValidationErrors.DUPLICATE_COLUMN_BETWEEN_SOURCES) @@ -223,13 +223,13 @@ def val_error_extra_slashes_spaces(tag, problem_tag): @hed_error(ValidationErrors.SIDECAR_KEY_MISSING, default_severity=ErrorSeverity.WARNING) def val_error_sidecar_key_missing(invalid_keys, category_keys, column_name): - return f"Category keys {invalid_keys} do not exist in sidecar for column '{column_name}'. Valid keys are: {category_keys}" + return f"Category values {invalid_keys} do not exist in metadata (e.g., BIDS sidecar or NWB meanings) for column '{column_name}'. Valid keys are: {category_keys}" @hed_error(ValidationErrors.TSV_COLUMN_MISSING, actual_code=ValidationErrors.SIDECAR_KEY_MISSING, default_severity=ErrorSeverity.WARNING) def val_error_tsv_column_missing(invalid_keys): - return f"{invalid_keys} used as column references in sidecar but are not columns in the tabular file" + return f"{invalid_keys} used as column references in metadata (e.g., BIDS sidecar or NWB meanings) but are not columns in the tabular file" @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID) @@ -307,17 +307,17 @@ def sidecar_error_blank_hed_string(): @hed_error(SidecarErrors.WRONG_HED_DATA_TYPE) def sidecar_error_hed_data_type(expected_type, given_type): - return f"Invalid HED string datatype sidecar. Should be '{expected_type}', but got '{given_type}'" + return f"Invalid datatype in metadata (e.g., a BIDS sidecar or NWB meanings). Should be '{expected_type}', but got '{given_type}'" @hed_error(SidecarErrors.INVALID_POUND_SIGNS_VALUE, actual_code=ValidationErrors.PLACEHOLDER_INVALID) def sidecar_error_invalid_pound_sign_count(pound_sign_count): - return f"There should be exactly one # character in a sidecar string. Found {pound_sign_count}" + return f"There should be exactly one # character in a HED value string. Found {pound_sign_count}" @hed_error(SidecarErrors.INVALID_POUND_SIGNS_CATEGORY, actual_code=ValidationErrors.PLACEHOLDER_INVALID) def sidecar_error_too_many_pound_signs(pound_sign_count): - return f"There should be no # characters in a category sidecar string. Found {pound_sign_count}" + return f"There should be no # characters in an annotation for a category value. Found {pound_sign_count}" @hed_error(SidecarErrors.UNKNOWN_COLUMN_TYPE) @@ -328,7 +328,7 @@ def sidecar_error_unknown_column(column_name): @hed_error(SidecarErrors.SIDECAR_HED_USED, actual_code=ValidationErrors.SIDECAR_INVALID) def sidecar_hed_used(): - return "'HED' is a reserved name and cannot be used as a sidecar except in expected places." + return "'HED' is a reserved name and cannot be used as a metadata column key except in expected places." @hed_error(SidecarErrors.SIDECAR_NA_USED, actual_code=ValidationErrors.SIDECAR_INVALID) diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 3729bb1be..f4b7224b3 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -42,6 +42,7 @@ default_sort_list = [ ErrorContext.CUSTOM_TITLE, ErrorContext.FILE_NAME, + ErrorContext.TABLE_NAME, ErrorContext.SIDECAR_COLUMN_NAME, ErrorContext.SIDECAR_KEY_NAME, ErrorContext.ROW, @@ -744,7 +745,8 @@ def _format_single_context_string(context_type, context, tab_count=0): """ tab_string = tab_count * '\t' error_types = { - ErrorContext.FILE_NAME: f"\nErrors in file '{context}'", + ErrorContext.FILE_NAME: f"\nErrors in file '{context}':", + ErrorContext.TABLE_NAME: f"\nErrors in table '{context}':", ErrorContext.SIDECAR_COLUMN_NAME: f"Column '{context}':", ErrorContext.SIDECAR_KEY_NAME: f"Key: {context}", ErrorContext.ROW: f'Issues in row {context}:', diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 5e8549c58..1c470e7a3 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -21,6 +21,7 @@ class ErrorContext: SCHEMA_SECTION = 'ec_section' SCHEMA_TAG = 'ec_schema_tag' SCHEMA_ATTRIBUTE = 'ec_attribute' + TABLE_NAME = 'ec_table_name' class ValidationErrors: diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 1072beead..1ceeb95d5 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -29,11 +29,11 @@ def __init__(self, def_dicts=None, hed_schema=None): if def_dicts: self.add_definitions(def_dicts, hed_schema) - def add_definitions(self, def_dicts, hed_schema=None): + def add_definitions(self, defs, hed_schema=None): """ Add definitions from dict(s) or strings(s) to this dict. Parameters: - def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts + defs (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts whose definitions should be added. hed_schema (HedSchema or None): Required if passing strings or lists of strings, unused otherwise. @@ -42,23 +42,25 @@ def add_definitions(self, def_dicts, hed_schema=None): Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input. Raises: - TypeError: Bad type passed as def_dicts. + TypeError: Bad type passed as defs. """ - if not isinstance(def_dicts, list): - def_dicts = [def_dicts] - for def_dict in def_dicts: - if isinstance(def_dict, (DefinitionDict, dict)): - self._add_definitions_from_dict(def_dict) - elif isinstance(def_dict, str) and hed_schema: - self.check_for_definitions(HedString(def_dict, hed_schema)) - elif isinstance(def_dict, list) and hed_schema: - for definition in def_dict: - self.check_for_definitions(HedString(definition, hed_schema)) + if not isinstance(defs, list): + defs = [defs] + for definition in defs: + if isinstance(definition, (DefinitionDict, dict)): + self._add_definitions_from_dict(definition) + elif isinstance(definition, str) and hed_schema: + self._issues += self.check_for_definitions(HedString(definition, hed_schema)) + elif isinstance(definition, list) and hed_schema: + for def_item in definition: + self._issues = self.check_for_definitions(HedString(def_item, hed_schema)) else: - raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict") + raise TypeError(f"Invalid type '{type(defs)}' passed to DefinitionDict") def _add_definition(self, def_tag, def_value): - if def_tag in self.defs: + if def_tag in self.defs and def_value == self.defs[def_tag]: + return + elif def_tag in self.defs: error_context = self.defs[def_tag].source_context self._issues += ErrorHandler.format_error_from_context(DefinitionErrors.DUPLICATE_DEFINITION, error_context=error_context, def_name=def_tag, actual_error=DefinitionErrors.DUPLICATE_DEFINITION) @@ -69,8 +71,10 @@ def _add_definitions_from_dict(self, def_dict): """ Add the definitions found in the given definition dictionary to this mapper. Parameters: - def_dict (DefinitionDict or dict): DefDict whose definitions should be added. + def_dict (DefinitionDict or dict): Dictionary-like whose definitions should be added. + Note: + Expects DefinitionEntries in the same form as a DefinitionDict """ for def_tag, def_value in def_dict.items(): self._add_definition(def_tag, def_value) diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 4e9d48c77..81393822c 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -17,7 +17,7 @@ def __init__(self, name, contents, takes_value, source_context): takes_value (bool): If True, expects ONE tag to have a single # sign in it. source_context (list, None): List (stack) of dictionaries giving context for reporting errors. """ - self.name = name + self.name = name.casefold() if contents: contents = contents.copy() contents.sort() @@ -66,3 +66,19 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag def __str__(self): return str(self.contents) + + def __eq__(self, other): + """ Check equality based on name, contents, and takes_value. + + Parameters: + other (DefinitionEntry): Another DefinitionEntry to compare with. + + Returns: + bool: True if name, contents, and takes_value are equal, False otherwise. + """ + if not isinstance(other, DefinitionEntry): + return False + + return (self.name == other.name and + self.contents == other.contents and + self.takes_value == other.takes_value) diff --git a/pyproject.toml b/pyproject.toml index e42504f4d..d770a6972 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,10 @@ keywords = [] # Add keywords here if any classifiers = [ "Programming Language :: Python :: 3", "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", ] requires-python = ">=3.9" @@ -43,8 +47,10 @@ dependencies = [ ] [project.urls] -"Homepage" = "https://github.com/hed-standard/hed-python/" +"Homepage" = "https://www.hedtags.org/" +"Repo" = "https://github.com/hed-standard/hed-python/" "Bug Tracker" = "https://github.com/hed-standard/hed-python/issues" +"API docs" = "https://www.hedtags.org/hed-python/" [project.optional-dependencies] # Define any optional dependencies here diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 7773f6d43..cbde48010 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -147,6 +147,21 @@ def test_altering_definition_contents(self): self.assertNotEqual(hed_string1, hed_string2) + def test_add_definition(self): + # Bad input string + def_dict = DefinitionDict() + def_dict.add_definitions("(Definition/testdefplaceholder,(Acceleration/#,Item/TestDef2,Red))", + self.hed_schema) + self.assertEqual(len(def_dict.issues), 1) + self.assertEqual(len(def_dict.defs), 0) + + # Good input string + def_dict2 = DefinitionDict() + def_dict2.add_definitions("(Definition/testdefplaceholder/#,(Acceleration/#,Item/TestDef2, Red))", + self.hed_schema) + self.assertEqual(len(def_dict2.issues), 0) + self.assertEqual(len(def_dict2.defs), 1) + if __name__ == '__main__': unittest.main() diff --git a/tests/models/test_definition_entry.py b/tests/models/test_definition_entry.py index ead69c87b..5a7918240 100644 --- a/tests/models/test_definition_entry.py +++ b/tests/models/test_definition_entry.py @@ -1,11 +1,77 @@ import unittest +import os +from hed.schema import load_schema_version +from hed.models.definition_entry import DefinitionEntry +from hed.models.hed_string import HedString class Test(unittest.TestCase): @classmethod def setUpClass(cls): - pass + cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml") + hed_schema = load_schema_version("8.4.0") + cls.contents1 = HedString("(Sensory-event)", hed_schema) + cls.contents2 = HedString("(Agent-action)", hed_schema) + cls.contents3 = HedString("(Sensory-event)", hed_schema) + cls.hed_schema = hed_schema + + def test_definition_entry_init(self): + """Test basic initialization of DefinitionEntry.""" + name = "TestDef" + takes_value = True + source_context = [{"line": 1}] + + entry = DefinitionEntry(name, self.contents1, True, source_context) + self.assertEqual(entry.name, name.casefold()) + self.assertIsNotNone(entry.contents) + self.assertEqual(entry.takes_value, takes_value) + self.assertEqual(entry.source_context, source_context) + self.assertEqual(str(entry), str(self.contents1)) + + def test_none_contents(self): + """Test with None contents.""" + entry = DefinitionEntry("TestDef", None, False, None) + self.assertIsNone(entry.contents) + self.assertFalse(entry.takes_value) + self.assertEqual(str(entry), "None") + + def test_eq_method(self): + """Test __eq__ method""" + name = "TestDef" + takes_value = True + source_context1 = [{"line": 1}] + source_context2 = [{"line": 2}] # Different source context + + entry1 = DefinitionEntry(name, self.contents1, takes_value, source_context1) + entry2 = DefinitionEntry(name, self.contents1, takes_value, source_context2) + entry3 = DefinitionEntry(name, self.contents3, takes_value, source_context2) + entry4 = DefinitionEntry("TESTDEF", self.contents3, takes_value, source_context1) + # Should be equal despite different source contexts + self.assertEqual(entry1, entry2) + self.assertTrue(entry1 == entry2) + # Should be equal as contents are the same + self.assertEqual(entry1, entry3) + self.assertTrue(entry1 == entry3) + # Should be equal with different cased names + self.assertEqual(entry1, entry4) + self.assertTrue(entry1 == entry4) + + # Not equal with different contents + entry5 = DefinitionEntry(name, self.contents2, takes_value, None) + self.assertNotEqual(entry1, entry5) + self.assertFalse(entry1 == entry5) + + # Not equal with different takes value + entry6 = DefinitionEntry(name, self.contents2, True, None) + self.assertNotEqual(entry1, entry6) + self.assertFalse(entry1 == entry6) + + # Not equal with different contents + entry7 = DefinitionEntry(name, None, False, None) + self.assertNotEqual(entry1, entry7) + self.assertFalse(entry1 == entry7) if __name__ == '__main__': diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index a2dc928ba..69de93401 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -6,7 +6,7 @@ from hed.errors import HedFileError, ValidationErrors from hed.models import ColumnMetadata, HedString, Sidecar from hed import schema -from hed.models import DefinitionDict +from hed.models import DefinitionDict, DefinitionEntry from hed.errors import ErrorHandler @@ -112,11 +112,14 @@ def test_validate_column_group(self): def test_duplicate_def(self): sidecar = self.json_def_sidecar - + # If external defs are the same, no error duplicate_dict = sidecar.extract_definitions(hed_schema=self.hed_schema) issues = sidecar.validate(self.hed_schema, extra_def_dicts=duplicate_dict, error_handler=ErrorHandler(False)) - self.assertEqual(len(issues), 5) - self.assertTrue(issues[0]['code'], ValidationErrors.DEFINITION_INVALID) + self.assertEqual(len(issues), 0) + test_dict = {"jsonfiledef3": DefinitionEntry("jsonfiledef3", None, False, None)} + issues2 = sidecar.validate(self.hed_schema, extra_def_dicts=test_dict, error_handler=ErrorHandler(False)) + self.assertEqual(len(issues2), 1) + self.assertTrue(issues2[0]['code'], ValidationErrors.DEFINITION_INVALID) def test_save_load(self): sidecar = Sidecar(self.json_def_filename) diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py index 73a890737..9074dbcb7 100644 --- a/tests/validator/test_def_validator.py +++ b/tests/validator/test_def_validator.py @@ -87,14 +87,15 @@ def test_duplicate_def(self): error_handler.push_error_context(ErrorContext.ROW, 5) def_dict.check_for_definitions(def_string, error_handler=error_handler) self.assertEqual(len(def_dict.issues), 0) - - def_validator = DefValidator([def_dict, def_dict]) - self.assertEqual(len(def_validator.issues), 1) - self.assertTrue('ec_row' in def_validator.issues[0]) - - def_dict = DefinitionDict([def_dict, def_dict, def_dict]) - self.assertEqual(len(def_dict.issues), 2) - self.assertTrue('ec_row' in def_dict.issues[0]) + def_dict2 = DefinitionDict() + def_dict2.add_definitions("(Definition/testdefplaceholder/#,(Acceleration/#,Item/TestDef2, Red))", self.hed_schema) + self.assertEqual(len(def_dict2.issues), 0) + def_dict3 = DefinitionDict([def_dict, def_dict2]) + self.assertEqual(len(def_dict3.issues), 1) + + # + # def_validator = DefValidator([def_dict, def_dict2]) + # self.assertEqual(len(def_validator.issues), 1) class TestDefErrors(unittest.TestCase): From aab19bcfbaa50e5af89a86bea0f44e800c0ccffc Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 15 Sep 2025 09:48:40 -0500 Subject: [PATCH 2/2] Updated the license format for pyproject.toml --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d770a6972..9ee0d2fcc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ authors = [ { name = "Owen Winterberg" }, { name = "Kay Robbins", email = "Kay.Robbins@utsa.edu" }, ] -license = "MIT" +license = {text = "MIT"} keywords = [] # Add keywords here if any classifiers = [ "Programming Language :: Python :: 3", @@ -24,7 +24,6 @@ classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", ] requires-python = ">=3.9"