hed-standard · IanCa · Oct 26, 2023 · Oct 25, 2023
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
@@ -321,7 +321,7 @@ def def_error_no_takes_value(def_name, placeholder_tag):
 
 @hed_tag_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID)
 def def_error_no_takes_value(tag, def_name):
-    return f"Tag '{str(tag)}' in Definition '{def_name}' has has a tag with the unique or required attribute."
+    return f"Tag '{str(tag)}' in Definition '{def_name}' has has a the unique or required attribute."
 
 
 @hed_tag_error(DefinitionErrors.BAD_DEFINITION_LOCATION, actual_code=ValidationErrors.DEFINITION_INVALID)

diff --git a/hed/models/column_metadata.py b/hed/models/column_metadata.py
@@ -1,6 +1,7 @@
 from enum import Enum
 from hed.errors.error_types import SidecarErrors
 import pandas as pd
+import copy
 
 
 class ColumnType(Enum):
@@ -102,13 +103,15 @@ def set_hed_strings(self, new_strings):
         return True
 
     @staticmethod
-    def _detect_column_type(dict_for_entry):
+    def _detect_column_type(dict_for_entry, basic_validation=True):
         """ Determine the ColumnType of a given json entry.
 
         Parameters:
             dict_for_entry (dict): The loaded json entry a specific column.
                 Generally has a "HED" entry among other optional ones.
-
+            basic_validation (bool): If False, does not verify past "HED" exists and the type
+                                     This is used to issue more precise errors that are normally just silently ignored,
+                                     but also not crash.
         Returns:
             ColumnType: The determined type of given column.  Returns None if unknown.
 
@@ -122,14 +125,14 @@ def _detect_column_type(dict_for_entry):
 
         hed_entry = dict_for_entry["HED"]
         if isinstance(hed_entry, dict):
-            if not all(isinstance(entry, str) for entry in hed_entry.values()):
+            if basic_validation and not all(isinstance(entry, str) for entry in hed_entry.values()):
                 return None
             return ColumnType.Categorical
 
         if not isinstance(hed_entry, str):
             return None
 
-        if "#" not in dict_for_entry["HED"]:
+        if basic_validation and "#" not in dict_for_entry["HED"]:
             return None
 
         return ColumnType.Value
@@ -155,3 +158,10 @@ def expected_pound_sign_count(column_type):
         else:
             return 0, None
         return expected_count, error_type
+
+    def _get_unvalidated_data(self):
+        """Returns a copy with less preliminary validation done(such as verifying all data types)"""
+        return_copy = copy.deepcopy(self)
+        return_copy.column_type = ColumnMetadata._detect_column_type(dict_for_entry=return_copy.source_dict,
+                                                                     basic_validation=False)
+        return return_copy
diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py
@@ -59,6 +59,7 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
         definition_checks = {}
         for column_data in sidecar:
             column_name = column_data.column_name
+            column_data = column_data._get_unvalidated_data()
             hed_strings = column_data.get_hed_strings()
             error_handler.push_error_context(ErrorContext.SIDECAR_COLUMN_NAME, column_name)
             for key_name, hed_string in hed_strings.items():
@@ -218,7 +219,7 @@ def _validate_column_structure(self, column_name, dict_for_entry, error_handler)
             val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_HED_USED_COLUMN)
             return val_issues
 
-        column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry)
+        column_type = ColumnMetadata._detect_column_type(dict_for_entry=dict_for_entry, basic_validation=False)
         if column_type is None:
             val_issues += error_handler.format_error_with_context(SidecarErrors.UNKNOWN_COLUMN_TYPE,
                                                                   column_name=column_name)
@@ -241,7 +242,11 @@ def _validate_categorical_column(self, column_name, dict_for_entry, error_handle
             error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name)
             if not hed_string:
                 val_issues += error_handler.format_error_with_context(SidecarErrors.BLANK_HED_STRING)
-            if key_name in self.reserved_category_values:
+            elif not isinstance(hed_string, str):
+                val_issues += error_handler.format_error_with_context(SidecarErrors.WRONG_HED_DATA_TYPE,
+                                                                      given_type=type(hed_string),
+                                                                      expected_type="str")
+            elif key_name in self.reserved_category_values:
                 val_issues += error_handler.format_error_with_context(SidecarErrors.SIDECAR_NA_USED, column_name)
             error_handler.pop_error_context()
         return val_issues

diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py
@@ -94,7 +94,7 @@ def test__iter__(self):
 
     def test_validate_column_group(self):
         validation_issues = self.errors_sidecar.validate(self.hed_schema)
-        self.assertEqual(len(validation_issues), 5)
+        self.assertEqual(len(validation_issues), 4)
 
         validation_issues2 = self.errors_sidecar_minor.validate(self.hed_schema)
         self.assertEqual(len(validation_issues2), 1)

diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py
@@ -86,6 +86,7 @@ def test_basic_onset_errors(self):
             f"({self.placeholder_label_def_string},Onset)",
             f"({self.placeholder_label_def_string},Offset)",
             f"({self.placeholder_label_def_string},Offset)",
+            f"({self.placeholder_label_def_string},Inset)",
             f"({self.placeholder_label_def_string}, Onset, (Event), (Event))",
             f"({self.placeholder_label_def_string}, Onset, (Event))",
             "(Onset)",
@@ -100,6 +101,7 @@ def test_basic_onset_errors(self):
             0,
             0,
             0,
+            0,
             1,
             1,
             1,
@@ -112,6 +114,7 @@ def test_basic_onset_errors(self):
             [],
             [],
             self.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=0),
+            self.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=0),
             self.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, tag=0,
                               tag_list=['Def/TestDefPlaceholder/2471', 'Onset', '(Event)', '(Event)']),
             [],

diff --git a/tests/validator/test_sidecar_validator.py b/tests/validator/test_sidecar_validator.py
@@ -99,4 +99,73 @@ def test_bad_structure_HED_in_ignored(self):
     '''
         sidecar = Sidecar(io.StringIO(sidecar_with_na_json))
         issues = sidecar.validate(self.hed_schema)
-        self.assertEqual(len(issues), 2)
+        self.assertEqual(len(issues), 2)
+
+    def test_bad_pound_signs(self):
+        sidecar_json = '''
+    {
+      "columnCat": {
+           "HED": {
+             "cat1": "Event",
+             "cat2": "Weight/# g"
+           }
+       },
+       "columnVal": {
+            "HED": "Description/Invalid"
+       },
+       "columnVal2": {
+            "HED": "Description/#, Weight/# g"
+       }
+    }
+    '''
+        sidecar = Sidecar(io.StringIO(sidecar_json))
+        issues = sidecar.validate(self.hed_schema)
+        self.assertEqual(len(issues), 3)
+
+    def test_invalid_list(self):
+        sidecar_json = '''
+        {
+          "columnInvalidList": {
+               "HED": ["This", "should", "be", "a", "dictionary", "not", "a", "list"]
+           }
+        }
+        '''
+        self.run_test(sidecar_json, expected_number_of_issues=1)
+
+    def test_invalid_number(self):
+        sidecar_json = '''
+        {
+          "columnInvalidNumber": {
+               "HED": 12345
+           }
+        }
+        '''
+        self.run_test(sidecar_json, expected_number_of_issues=1)
+
+    def test_invalid_boolean(self):
+        sidecar_json = '''
+        {
+          "columnInvalidBoolean": {
+               "HED": true
+           }
+        }
+        '''
+        self.run_test(sidecar_json, expected_number_of_issues=1)
+
+    def test_mixed_category(self):
+        sidecar_json = '''
+        {
+          "columnMixedCategory": {
+               "HED": {
+                 "cat1": "Event",
+                 "cat2": ["Invalid", "data", "type"]
+               }
+           }
+        }
+        '''
+        self.run_test(sidecar_json, expected_number_of_issues=1)
+
+    def run_test(self, sidecar_json, expected_number_of_issues):
+        sidecar = Sidecar(io.StringIO(sidecar_json))
+        issues = sidecar.validate(self.hed_schema)
+        self.assertEqual(len(issues), expected_number_of_issues)
diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py
@@ -884,6 +884,28 @@ def test_multiple_copies_unique_tags(self):
         self.validator_semantic(test_strings, expected_results, expected_issues, False)
 
 
+class RequiredTagInDefinition(TestHed):
+    schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'
+
+    @staticmethod
+    def string_obj_func(validator):
+        from hed.validator import DefValidator
+        def_dict = DefValidator()
+        return partial(def_dict.check_for_definitions)
+
+    def test_includes_all_required_tags(self):
+        test_strings = {
+            'complete': 'Animal-agent, Action, (Definition/labelWithRequired, (Action))',
+        }
+        expected_results = {
+            'complete': False,
+        }
+        expected_issues = {
+            'complete': self.format_error(DefinitionErrors.BAD_PROP_IN_DEFINITION, tag=3, def_name='labelWithRequired'),
+        }
+        self.validator_semantic(test_strings, expected_results, expected_issues, True)
+
+
 class TestHedSpecialUnits(TestHed):
     compute_forms = True
     schema_file = '../data/validator_tests/HED8.0.0_added_tests.mediawiki'