diff --git a/hed/errors/__init__.py b/hed/errors/__init__.py index 8bbe1f662..d68ba3e08 100644 --- a/hed/errors/__init__.py +++ b/hed/errors/__init__.py @@ -1,4 +1,4 @@ -from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues +from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \ ValidationErrors, ColumnErrors from .error_types import ErrorContext, ErrorSeverity diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index e5e51d7c5..da5c06b3d 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -339,6 +339,11 @@ def onset_error_offset_before_onset(tag): return f"Offset tag '{tag}' does not have a matching onset." +@hed_tag_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) +def onset_error_same_defs_one_row(tag, def_name): + return f"'{tag}' uses name '{def_name}', which was already used at this onset time." + + @hed_tag_error(OnsetErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) def onset_error_inset_before_onset(tag): return f"Inset tag '{tag}' does not have a matching onset." diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 596882e37..409656235 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -671,3 +671,31 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True): _create_error_tree(value, context_ul, add_link) return parent_element + + +def replace_tag_references(list_or_dict): + """Utility function to remove any references to tags, strings, etc from any type of nested list or dict + + Use this if you want to save out issues to a file. + + If you'd prefer a copy returned, use replace_tag_references(list_or_dict.copy()) + + Parameters: + list_or_dict(list or dict): An arbitrarily nested list/dict structure + """ + if isinstance(list_or_dict, dict): + for key, value in list_or_dict.items(): + if isinstance(value, (dict, list)): + replace_tag_references(value) + elif isinstance(value, (bool, float, int)): + list_or_dict[key] = value + else: + list_or_dict[key] = str(value) + elif isinstance(list_or_dict, list): + for key, value in enumerate(list_or_dict): + if isinstance(value, (dict, list)): + replace_tag_references(value) + elif isinstance(value, (bool, float, int)): + list_or_dict[key] = value + else: + list_or_dict[key] = str(value) diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 1fa43c8b1..7305e7c6c 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -160,6 +160,8 @@ class OnsetErrors: ONSET_TOO_MANY_DEFS = "ONSET_TOO_MANY_DEFS" ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP" INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET" + ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW" + class ColumnErrors: INVALID_COLUMN_REF = "INVALID_COLUMN_REF" diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 0e7190498..12e2d8895 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -105,12 +105,54 @@ def dataframe_a(self): @property def series_a(self): """Return the assembled dataframe as a series - Probably a placeholder name. Returns: - Series: the assembled dataframe with columns merged""" + Series: the assembled dataframe with columns merged + """ return self.combine_dataframe(self.assemble()) + @property + def series_filtered(self): + """Return the assembled dataframe as a series, with rows that have the same onset combined + + Returns: + Series: the assembled dataframe with columns merged, and the rows filtered together + """ + if self.onsets is not None: + indexed_dict = self._indexed_dict_from_onsets(self.onsets.astype(float)) + return self._filter_by_index_list(self.series_a, indexed_dict=indexed_dict) + + @staticmethod + def _indexed_dict_from_onsets(onsets): + current_onset = -1000000.0 + tol = 1e-9 + from collections import defaultdict + indexed_dict = defaultdict(list) + for i, onset in enumerate(onsets): + if abs(onset - current_onset) > tol: + current_onset = onset + indexed_dict[current_onset].append(i) + + return indexed_dict + + @staticmethod + def _filter_by_index_list(original_series, indexed_dict): + new_series = ["n/a"] * len(original_series) # Initialize new_series with "n/a" + + for onset, indices in indexed_dict.items(): + if indices: + first_index = indices[0] # Take the first index of each onset group + # Join the corresponding original series entries and place them at the first index + new_series[first_index] = ",".join([str(original_series[i]) for i in indices]) + + return new_series + + @property + def onsets(self): + """Returns the onset column if it exists""" + if "onset" in self.columns: + return self._dataframe["onset"] + @property def name(self): """ Name of the data. """ diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index fcafcf87b..293c8ad06 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -1,12 +1,15 @@ from hed.models.hed_string import HedString from hed.models.hed_tag import HedTag +from hed.models.hed_group import HedGroup from hed.models.definition_dict import DefinitionDict from hed.errors.error_types import ValidationErrors from hed.errors.error_reporter import ErrorHandler +from hed.models.model_constants import DefTagNames +from hed.errors.error_types import OnsetErrors class DefValidator(DefinitionDict): - """ Handles validating Def/ and Def-expand/. + """ Handles validating Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset """ def __init__(self, def_dicts=None, hed_schema=None): @@ -128,3 +131,71 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator): def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag) return def_issues + + def validate_onset_offset(self, hed_string_obj): + """ Validate onset/offset + + Parameters: + hed_string_obj (HedString): The hed string to check. + + Returns: + list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names). + """ + onset_issues = [] + for found_onset, found_group in self._find_onset_tags(hed_string_obj): + if not found_onset: + return [] + + def_tags = found_group.find_def_tags() + if not def_tags: + onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset) + continue + + if len(def_tags) > 1: + onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, + tag=def_tags[0][0], + tag_list=[tag[0] for tag in def_tags[1:]]) + continue + + # Get all children but def group and onset/offset, then validate #/type of children. + def_tag, def_group, _ = def_tags[0] + if def_group is None: + def_group = def_tag + children = [child for child in found_group.children if + def_group is not child and found_onset is not child] + max_children = 1 + if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY: + max_children = 0 + if len(children) > max_children: + onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, + def_tag, + found_group.children) + continue + + if children: + # Make this a loop if max_children can be > 1 + child = children[0] + if not isinstance(child, HedGroup): + onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, + child, + def_tag) + + # At this point we have either an onset or offset tag and it's name + onset_issues += self._handle_onset_or_offset(def_tag) + + return onset_issues + + def _find_onset_tags(self, hed_string_obj): + return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS) + + def _handle_onset_or_offset(self, def_tag): + def_name, _, placeholder = def_tag.extension.partition('/') + + def_entry = self.defs.get(def_name.lower()) + if def_entry is None: + return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag) + if bool(def_entry.takes_value) != bool(placeholder): + return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag, + has_placeholder=bool(def_entry.takes_value)) + + return [] diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index a3293dc4f..b955d5e41 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -12,19 +12,17 @@ from hed.models import HedTag from hed.validator.tag_validator import TagValidator from hed.validator.def_validator import DefValidator -from hed.validator.onset_validator import OnsetValidator class HedValidator: """ Top level validation of HED strings. """ - def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, definitions_allowed=False): + def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): """ Constructor for the HedValidator class. Parameters: hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation. def_dicts(DefinitionDict or list or dict): the def dicts to use for validation - run_full_onset_checks(bool): If True, check for matching onset/offset tags definitions_allowed(bool): If False, flag definitions found as errors """ super().__init__() @@ -33,8 +31,6 @@ def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, defin self._tag_validator = TagValidator(hed_schema=self._hed_schema) self._def_validator = DefValidator(def_dicts, hed_schema) - self._onset_validator = OnsetValidator(def_dict=self._def_validator, - run_full_onset_checks=run_full_onset_checks) self._definitions_allowed = definitions_allowed def validate(self, hed_string, allow_placeholders, error_handler=None): @@ -80,7 +76,7 @@ def run_full_string_checks(self, hed_string): issues = [] issues += self._validate_tags_in_hed_string(hed_string) issues += self._validate_groups_in_hed_string(hed_string) - issues += self._onset_validator.validate_onset_offset(hed_string) + issues += self._def_validator.validate_onset_offset(hed_string) return issues def _validate_groups_in_hed_string(self, hed_string_obj): diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index b1d928347..94be9d7ef 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -7,13 +7,11 @@ class OnsetValidator: """ Validates onset/offset pairs. """ - def __init__(self, def_dict, run_full_onset_checks=True): - self._defs = def_dict + def __init__(self): self._onsets = {} - self._run_full_onset_checks = run_full_onset_checks - def validate_onset_offset(self, hed_string_obj): - """ Validate onset/offset + def validate_temporal_relations(self, hed_string_obj): + """ Validate onset/offset/inset tag relations Parameters: hed_string_obj (HedString): The hed string to check. @@ -22,76 +20,46 @@ def validate_onset_offset(self, hed_string_obj): list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names). """ onset_issues = [] - for found_onset, found_group in self._find_onset_tags(hed_string_obj): - if not found_onset: + used_def_names = set() + for temporal_tag, temporal_group in self._find_temporal_tags(hed_string_obj): + if not temporal_tag: return [] - def_tags = found_group.find_def_tags() + def_tags = temporal_group.find_def_tags(include_groups=0) if not def_tags: - onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset) continue - if len(def_tags) > 1: - onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS, - tag=def_tags[0][0], - tag_list=[tag[0] for tag in def_tags[1:]]) + def_tag = def_tags[0] + def_name = def_tag.extension + if def_name.lower() in used_def_names: + onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, tag=temporal_tag, + def_name=def_name) continue - # Get all children but def group and onset/offset, then validate #/type of children. - def_tag, def_group, _ = def_tags[0] - if def_group is None: - def_group = def_tag - children = [child for child in found_group.children if - def_group is not child and found_onset is not child] - max_children = 1 - if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY: - max_children = 0 - if len(children) > max_children: - onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, - def_tag, - found_group.children) - continue - - if children: - # Make this a loop if max_children can be > 1 - child = children[0] - if not isinstance(child, HedGroup): - onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, - child, - def_tag) + used_def_names.add(def_tag.extension.lower()) # At this point we have either an onset or offset tag and it's name - onset_issues += self._handle_onset_or_offset(def_tag, found_onset) + onset_issues += self._handle_onset_or_offset(def_tag, temporal_tag) return onset_issues - def _find_onset_tags(self, hed_string_obj): + def _find_temporal_tags(self, hed_string_obj): return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS) def _handle_onset_or_offset(self, def_tag, onset_offset_tag): is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_ORG_KEY full_def_name = def_tag.extension - def_name, _, placeholder = def_tag.extension.partition('/') - - def_entry = self._defs.get(def_name) - if def_entry is None: - return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag) - if bool(def_entry.takes_value) != bool(placeholder): - return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag, - has_placeholder=bool(def_entry.takes_value)) - - if self._run_full_onset_checks: - if is_onset: - # onset can never fail as it implies an offset - self._onsets[full_def_name.lower()] = full_def_name - else: - is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_ORG_KEY - if full_def_name.lower() not in self._onsets: - if is_offset: - return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag) - else: - return ErrorHandler.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=def_tag) - elif is_offset: - del self._onsets[full_def_name.lower()] + if is_onset: + # onset can never fail as it implies an offset + self._onsets[full_def_name.lower()] = full_def_name + else: + is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_ORG_KEY + if full_def_name.lower() not in self._onsets: + if is_offset: + return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag) + else: + return ErrorHandler.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=def_tag) + elif is_offset: + del self._onsets[full_def_name.lower()] return [] diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index 9e6f222fd..2a6f22099 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -51,7 +51,6 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None) sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts) hed_validator = HedValidator(self._schema, def_dicts=sidecar_def_dict, - run_full_onset_checks=False, definitions_allowed=True) issues += sidecar._extract_definition_issues diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 025aa54d4..ebd647953 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -5,6 +5,8 @@ from hed.models import ColumnType from hed import HedString from hed.errors.error_reporter import sort_issues, check_for_any_errors +from hed.validator.onset_validator import OnsetValidator +from hed.validator.hed_validator import HedValidator PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: " @@ -19,6 +21,7 @@ def __init__(self, hed_schema): """ self._schema = hed_schema self._hed_validator = None + self._onset_validator = None def validate(self, data, def_dicts=None, name=None, error_handler=None): """ @@ -33,29 +36,31 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): Returns: issues (list of dict): A list of issues for hed string """ - from hed.validator import HedValidator + issues = [] if error_handler is None: error_handler = ErrorHandler() error_handler.push_error_context(ErrorContext.FILE_NAME, name) self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts) - # Check the structure of the input data, if it's a BaseInput + self._onset_validator = OnsetValidator() + onset_filtered = None if isinstance(data, BaseInput): issues += self._validate_column_structure(data, error_handler) + onset_filtered = data.series_filtered data = data.dataframe_a # Check the rows of the input data - issues += self._run_checks(data, error_handler) + issues += self._run_checks(data, onset_filtered, error_handler=error_handler) error_handler.pop_error_context() issues = sort_issues(issues) return issues - def _run_checks(self, data, error_handler): + def _run_checks(self, hed_df, onset_filtered, error_handler): issues = [] - columns = list(data.columns) - for row_number, text_file_row in enumerate(data.itertuples(index=False)): + columns = list(hed_df.columns) + for row_number, text_file_row in enumerate(hed_df.itertuples(index=False)): error_handler.push_error_context(ErrorContext.ROW, row_number) row_strings = [] new_column_issues = [] @@ -76,12 +81,19 @@ def _run_checks(self, data, error_handler): issues += new_column_issues if check_for_any_errors(new_column_issues): + error_handler.pop_error_context() continue + + row_string = None + if onset_filtered is not None: + row_string = HedString(onset_filtered[row_number], self._schema, self._hed_validator._def_validator) elif row_strings: row_string = HedString.from_hed_strings(row_strings) + + if row_string: error_handler.push_error_context(ErrorContext.HED_STRING, row_string) new_column_issues = self._hed_validator.run_full_string_checks(row_string) - + new_column_issues += self._onset_validator.validate_temporal_relations(row_string) error_handler.add_context_and_filter(new_column_issues) error_handler.pop_error_context() issues += new_column_issues diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index ac817fa81..972d53d4d 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -57,7 +57,6 @@ skip_tests = { "VERSION_DEPRECATED": "Not applicable", - "onset-offset-inset-error-duplicated-onset-or-offset": "TBD how we implement this", "tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet", } @@ -139,7 +138,7 @@ def report_result(self, expected_result, issues, error_code, description, name, self.fail_count.append(name) def _run_single_string_test(self, info, schema, def_dict, error_code, description, name, error_handler): - string_validator = HedValidator(hed_schema=schema, def_dicts=def_dict, run_full_onset_checks=False) + string_validator = HedValidator(hed_schema=schema, def_dicts=def_dict) for result, tests in info.items(): for test in tests: test_string = HedString(test, schema) diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index c5f850aa9..d4482314c 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -1,6 +1,6 @@ import unittest from hed.errors import ErrorHandler, ErrorContext, ErrorSeverity, ValidationErrors, SchemaWarnings, \ - get_printable_issue_string, sort_issues + get_printable_issue_string, sort_issues, replace_tag_references from hed import HedString from hed import load_schema_version @@ -9,6 +9,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): cls.error_handler = ErrorHandler() + cls._schema = load_schema_version() pass def test_push_error_context(self): @@ -142,3 +143,20 @@ def test_sort_issues(self): self.assertEqual(reversed_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') self.assertEqual(reversed_issues[3][ErrorContext.CUSTOM_TITLE], 'issue2') self.assertEqual(reversed_issues[4][ErrorContext.CUSTOM_TITLE], 'issue1') + + + def test_replace_tag_references(self): + # Test with mixed data types and HedString in a nested dict + nested_dict = {'a': HedString('Hed1', self._schema), 'b': {'c': 2, 'd': [3, {'e': HedString('Hed2', self._schema)}]}, 'f': [5, 6]} + replace_tag_references(nested_dict) + self.assertEqual(nested_dict, {'a': 'Hed1', 'b': {'c': 2, 'd': [3, {'e': 'Hed2'}]}, 'f': [5, 6]}) + + # Test with mixed data types and HedString in a nested list + nested_list = [HedString('Hed1', self._schema), {'a': 2, 'b': [3, {'c': HedString('Hed2', self._schema)}]}] + replace_tag_references(nested_list) + self.assertEqual(nested_list, ['Hed1', {'a': 2, 'b': [3, {'c': 'Hed2'}]}]) + + # Test with mixed data types and HedString in a list within a dict + mixed = {'a': HedString('Hed1', self._schema), 'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]} + replace_tag_references(mixed) + self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]}) diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 5f8b2bbab..f5b381eb3 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -271,3 +271,58 @@ def test_combine_dataframe_with_mixed_values(self): result = BaseInput.combine_dataframe(loaded_df) expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', '']) self.assertTrue(result.equals(expected)) + + +class TestOnsetDict(unittest.TestCase): + def test_empty_and_single_onset(self): + self.assertEqual(BaseInput._indexed_dict_from_onsets([]), {}) + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5]), {3.5: [0]}) + + def test_identical_and_approx_equal_onsets(self): + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5, 3.5]), {3.5: [0, 1]}) + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5, 3.500000001]), {3.5: [0], 3.500000001: [1]}) + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5, 3.5000000000001]), {3.5: [0, 1]}) + + def test_distinct_and_mixed_onsets(self): + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5, 4.0, 4.4]), {3.5: [0], 4.0: [1], 4.4: [2]}) + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5, 3.5, 4.0, 4.4]), {3.5: [0, 1], 4.0: [2], 4.4: [3]}) + self.assertEqual(BaseInput._indexed_dict_from_onsets([4.0, 3.5, 4.4, 4.4]), {4.0: [0], 3.5: [1], 4.4: [2, 3]}) + + def test_complex_onsets(self): + # Negative, zero, and positive onsets + self.assertEqual(BaseInput._indexed_dict_from_onsets([-1.0, 0.0, 1.0]), {-1.0: [0], 0.0: [1], 1.0: [2]}) + + # Very close but distinct onsets + self.assertEqual(BaseInput._indexed_dict_from_onsets([1.0, 1.0 + 1e-8, 1.0 + 2e-8]), + {1.0: [0], 1.0 + 1e-8: [1], 1.0 + 2e-8: [2]}) + # Very close + self.assertEqual(BaseInput._indexed_dict_from_onsets([1.0, 1.0 + 1e-10, 1.0 + 2e-10]), + {1.0: [0, 1, 2]}) + + # Mixed scenario + self.assertEqual(BaseInput._indexed_dict_from_onsets([3.5, 3.5, 4.0, 4.4, 4.4, -1.0]), + {3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]}) + + def test_empty_and_single_item_series(self): + self.assertEqual(BaseInput._filter_by_index_list([], {}), []) + self.assertEqual(BaseInput._filter_by_index_list(["apple"], {0: [0]}), ["apple"]) + + def test_two_item_series_with_same_onset(self): + self.assertEqual(BaseInput._filter_by_index_list(["apple", "orange"], {0: [0, 1]}), ["apple,orange", "n/a"]) + + def test_multiple_item_series(self): + original = ["apple", "orange", "banana", "mango"] + indexed_dict = {0: [0, 1], 1: [2], 2: [3]} + self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["apple,orange", "n/a", "banana", "mango"]) + + def test_complex_scenarios(self): + # Test with negative, zero and positive onsets + original = ["negative", "zero", "positive"] + indexed_dict = {-1: [0], 0: [1], 1: [2]} + self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["negative", "zero", "positive"]) + + # Test with more complex indexed_dict + original = ["apple", "orange", "banana", "mango", "grape"] + indexed_dict = {0: [0, 1], 1: [2], 2: [3, 4]} + self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), + ["apple,orange", "n/a", "banana", "mango,grape", "n/a"]) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py index 9ae1ef776..97b87df83 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py @@ -5,6 +5,7 @@ from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp, \ HedValidationSummary +from hed.errors import error_reporter class Test(unittest.TestCase): @@ -74,6 +75,7 @@ def test_get_summary_details(self): sum_context = dispatch.summary_dicts[sum_op.summary_name] sum_obj1 = sum_context.get_summary_details() self.assertIsInstance(sum_obj1, dict) + error_reporter.replace_tag_references(sum_obj1) json_str1 = json.dumps(sum_obj1, indent=4) self.assertIsInstance(json_str1, str) json_obj1 = json.loads(json_str1) @@ -81,6 +83,7 @@ def test_get_summary_details(self): sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) sum_context2 = dispatch.summary_dicts[sum_op.summary_name] sum_obj2 = sum_context2.get_summary_details() + error_reporter.replace_tag_references(sum_obj2) json_str2 = json.dumps(sum_obj2, indent=4) self.assertIsInstance(json_str2, str) sum_obj3 = sum_context2.get_summary_details(include_individual=False) diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index 5e20d8ad4..a594cf61c 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -5,7 +5,7 @@ from hed.errors import ErrorHandler, OnsetErrors, ErrorContext, ValidationErrors from hed.models import HedString, DefinitionDict from hed import schema -from hed.validator import HedValidator, OnsetValidator +from hed.validator import HedValidator, OnsetValidator, DefValidator from tests.validator.test_tag_validator_base import TestHedBase @@ -39,27 +39,32 @@ def setUpClass(cls): def_string = HedString(cls.definition_string, hed_schema=cls.hed_schema) cls.def_dict_both.check_for_definitions(def_string) - def _test_issues_base(self, test_strings, test_issues, test_context, placeholder_def_only): if placeholder_def_only: - validator = OnsetValidator(self.def_dict_placeholder) + onset_validator = OnsetValidator() + def_validator = DefValidator(self.def_dict_placeholder) else: - validator = OnsetValidator(self.def_dict_both) + onset_validator = OnsetValidator() + def_validator = DefValidator(self.def_dict_both) + for string, expected_params, context in zip(test_strings, test_issues, test_context): test_string = HedString(string, self.hed_schema) error_handler = ErrorHandler() error_handler.push_error_context(ErrorContext.HED_STRING, test_string) onset_issues = [] - onset_issues += validator.validate_onset_offset(test_string) + onset_issues += def_validator.validate_onset_offset(test_string) + if not onset_issues: + onset_issues += onset_validator.validate_temporal_relations(test_string) error_handler.add_context_and_filter(onset_issues) test_string.shrink_defs() issues = self.format_errors_fully(error_handler, hed_string=test_string, params=expected_params) # print(str(onset_issues)) # print(str(issues)) + # print(onset_validator._onsets) error_handler.pop_error_context() - self.assertEqual(len(validator._onsets), context) + self.assertEqual(len(onset_validator._onsets), context) self.assertCountEqual(onset_issues, issues) def _test_issues_no_context(self, test_strings, test_issues): @@ -299,7 +304,7 @@ def test_onset_two_in_one_line(self): [], [], [], - [] + self.format_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, tag=3, def_name="TestDefPlaceholder/2471") ] self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)