diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 0432bf180..b6b6f726e 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -7,6 +7,17 @@ from hed.errors.error_types import (ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors, TemporalErrors, ColumnErrors, TagQualityErrors) +def get_tag_list_str(tag_list): + """ Return a string representation of a list of tags. + + Parameters: + tag_list (list): A list of tags to convert to a string. + + Returns: + str: A string representation of the list of tags. + """ + return "[" + ', '.join([str(tag) for tag in tag_list]) + "]" + @hed_tag_error(ValidationErrors.UNITS_INVALID) def val_error_invalid_unit(tag, units): @@ -109,9 +120,9 @@ def val_error_duplicate_reserved_tag(tag, group): @hed_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, actual_code=ValidationErrors.TAG_GROUP_ERROR) -def val_error_group_for_reserved_tag(group, group_count): +def val_error_group_for_reserved_tag(group, group_count, tag_list): return (f'The number of non-def-expand subgroups for group "{group}" is {group_count}, "' - f'which does not meet reserved tag requirements."') + f'which does not meet reserved tags {get_tag_list_str(tag_list)} requirements."') @hed_error(ValidationErrors.PARENTHESES_MISMATCH) @@ -338,8 +349,7 @@ def def_error_no_group_tags(def_name): @hed_error(DefinitionErrors.WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.DEFINITION_INVALID) def def_error_wrong_number_groups(def_name, tag_list): - tag_list_strings = [str(tag) for tag in tag_list] - return f"Too many group tags found in definition for {def_name}. Expected 1, found: {tag_list_strings}" + return f"Too many group tags found in definition for {def_name}. Expected 1, found: {get_tag_list_str(tag_list)}" @hed_error(DefinitionErrors.WRONG_NUMBER_TAGS, actual_code=ValidationErrors.DEFINITION_INVALID) @@ -350,9 +360,8 @@ def def_error_wrong_number_tags(def_name, tag_list): @hed_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, actual_code=ValidationErrors.DEFINITION_INVALID) def def_error_wrong_placeholder_count(def_name, expected_count, tag_list): - tag_list_strings = [str(tag) for tag in tag_list] return f"Incorrect number placeholders or placeholder tags found in definition for {def_name}. " + \ - f"Expected {expected_count}, found: {tag_list_strings}" + f"Expected {expected_count}, found: {get_tag_list_str(tag_list)}" @hed_error(DefinitionErrors.DUPLICATE_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID) @@ -485,8 +494,8 @@ def missing_task_role(event_type, string, line): @hed_error(TagQualityErrors.AMBIGUOUS_TAG_GROUPING, default_severity=ErrorSeverity.WARNING, actual_code=TagQualityErrors.AMBIGUOUS_TAG_GROUPING) -def ambiguous_tag_grouping(tags, string, line): - return f"The HED string '{string}' at line {line} has ambiguously grouped tags [{tags}] and needs parentheses." +def ambiguous_tag_grouping(tag_list, string, line): + return f"The HED string '{string}' at line {line} has ambiguously grouped tags {get_tag_list_str(tag_list)}and needs parentheses." @hed_error(TagQualityErrors.MISSING_SENSORY_PRESENTATION, default_severity=ErrorSeverity.WARNING, diff --git a/hed/tools/analysis/event_checker.py b/hed/tools/analysis/event_checker.py index 417ccdbed..538481092 100644 --- a/hed/tools/analysis/event_checker.py +++ b/hed/tools/analysis/event_checker.py @@ -1,5 +1,5 @@ from hed.errors.error_types import TagQualityErrors -from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string +from hed.errors import ErrorHandler, ErrorContext, sort_issues from hed import TabularInput from hed.tools import EventManager, HedTagManager @@ -20,17 +20,22 @@ class EventChecker: ALL_ROLES = TASK_ROLES.union(ACTION_ROLES).union(STIMULUS_ROLES) - def __init__(self, hed_obj, line_number, error_handler=None): + def __init__(self, hed_obj, line_number, original_line_number=None, error_handler=None): """ Constructor for the EventChecker class. Parameters: hed_obj (HedString): The HED string to check. line_number (int or None): The index of the HED string in the file. + original_line_number (int or None): The original line number in the file. error_handler (ErrorHandler): The ErrorHandler object to use for error handling. """ self.hed_obj = hed_obj self.line_number = line_number + if original_line_number is None: + self.original_line_number = line_number + else: + self.original_line_number = int(original_line_number) if error_handler is None: self.error_handler = ErrorHandler() else: @@ -76,7 +81,7 @@ def _check_grouping(self, hed_groups): event_tags = [tag.short_base_tag for tag in all_tags if tag.short_base_tag in self.EVENT_TAGS] if not event_tags: return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_EVENT_TYPE, - string=str(group), line=self.line_number) + string=str(group), line=self.original_line_number) if len(event_tags) == 1: return self._check_event_group(group, event_tags[0], all_tags) @@ -84,7 +89,7 @@ def _check_grouping(self, hed_groups): # At this point, we know we have multiple event tags in the group. if any(tag.short_base_tag in event_tags for tag in group.tags()): return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.IMPROPER_EVENT_GROUPS, - string=str(group), line=self.line_number, + string=str(group), line=self.original_line_number, event_types =', '.join(event_tags)) hed_groups.extend(group.groups()) return [] @@ -132,7 +137,7 @@ def _check_task_role(self, hed_group, event_tag, all_tags): return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_TASK_ROLE, event_type=event_tag, string=str(hed_group), - line=self.line_number) + line=self.original_line_number) def _check_presentation_modality(self, hed_group, event_tag, all_tags): """ Check that a group with a single event sensory event tag @@ -151,7 +156,7 @@ def _check_presentation_modality(self, hed_group, event_tag, all_tags): if any('sensory-presentation' in tag.tag_terms for tag in all_tags): return [] return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_SENSORY_PRESENTATION, - string=str(hed_group), line=self.line_number) + string=str(hed_group), line=self.original_line_number) def _check_action_tags(self, hed_group, event_tag, all_tags): """ Check that a group with a single event tag has at least one task role tag unless it is a non-task event. @@ -170,7 +175,7 @@ def _check_action_tags(self, hed_group, event_tag, all_tags): if any('action' in tag.tag_terms for tag in all_tags): return [] return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_ACTION_TAG, - string=str(hed_group), line=self.line_number) + string=str(hed_group), line=self.original_line_number) class EventsChecker: """ Class to check for event tag quality errors in an event file.""" @@ -191,8 +196,6 @@ def __init__(self, hed_schema, input_data, name=None): self._schema = hed_schema self.input_data = input_data self.name = name - self.group_error_lines = [] - self.missing_error_lines = [] self._initialize() def _initialize(self): @@ -200,6 +203,8 @@ def _initialize(self): event_manager = EventManager(self.input_data, self._schema) tag_man = HedTagManager(event_manager, remove_types=self.REMOVE_TYPES) self.hed_objs = tag_man.get_hed_objs(include_context=False, replace_defs=True) + self.onsets = event_manager.onsets + self.original_index = event_manager.original_index def validate_event_tags(self): """ Verify that the events in the HED strings validly represent events. @@ -211,12 +216,13 @@ def validate_event_tags(self): error_handler = ErrorHandler() error_handler.push_error_context(ErrorContext.FILE_NAME, self.name) for index, hed_obj in enumerate(self.hed_objs): - if not hed_obj: + if not hed_obj or hed_obj is None: continue - error_handler.push_error_context(ErrorContext.LINE, index) - event_check = EventChecker(hed_obj, index, error_handler) + error_handler.push_error_context(ErrorContext.LINE, int(self.original_index.iloc[index])) + event_check = EventChecker(hed_obj, index, int(self.original_index.iloc[index]), error_handler) issues += event_check.issues error_handler.pop_error_context() + issues = sort_issues(issues) return issues def insert_issue_details(self, issues): @@ -231,20 +237,17 @@ def insert_issue_details(self, issues): line = issue.get('ec_line') if line is None: continue - lines = self.get_onset_lines(line) - data_info = self.input_data._dataframe.iloc[lines] - details = ["Sources:"] - for index, row in data_info.iterrows(): - details += EventsChecker.get_issue_details(row, index, side_data) + data_info = self.input_data._dataframe.iloc[line] + details = [f"Sources: line:{line} onset:{self.onsets[line]}"] + \ + EventsChecker.get_issue_details(data_info, side_data) issue['details'] = details @staticmethod - def get_issue_details(data_info, line, side_data): + def get_issue_details(data_info, side_data): """ Get the source details for the issue. Parameters: data_info (pd.Series): The row information from the original tsv. - line (list): A list of lines from the original tsv. side_data (pd.Series): The sidecar data. Returns: @@ -260,8 +263,7 @@ def get_issue_details(data_info, line, side_data): col_line = f" => sidecar_source:{EventsChecker.get_hed_source(side_data[col].hed_dict, value)}" if not col_line and col != 'HED': continue - col_line = f"\t[line:{line} column_name:{col} column_value:{data_info[col]}]" + col_line - details.append(col_line) + details.append(f"\t[Column_name:{col} Column_value:{data_info[col]}]" + col_line) return details @staticmethod diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 2cbad4bf2..3e73560bd 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -35,6 +35,7 @@ def __init__(self, input_data, hed_schema, extra_defs=None): self.input_data = input_data self.def_dict = input_data.get_def_dict(hed_schema, extra_def_dicts=extra_defs) self.onsets = None # list of onset times or None if not an events file + self.original_index = None # list of original indices of the events self.base = None # list of strings containing the starts of event processes self.context = None # list of strings containing the contexts of event processes self.hed_strings = None # list of HedString objects without the temporal events @@ -62,6 +63,7 @@ def _create_event_list(self, input_data): hed_strings = [HedString(hed_string, self.hed_schema) for hed_string in delay_df.HED] self.onsets = pd.to_numeric(delay_df.onset, errors='coerce') + self.original_index = pd.to_numeric(delay_df.original_index, errors='coerce') self.event_list = [[] for _ in range(len(hed_strings))] onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet. for event_index, hed in enumerate(hed_strings): diff --git a/hed/validator/reserved_checker.py b/hed/validator/reserved_checker.py index 0cf226a60..9fa019c26 100644 --- a/hed/validator/reserved_checker.py +++ b/hed/validator/reserved_checker.py @@ -6,7 +6,6 @@ from hed.errors.error_types import ValidationErrors, TemporalErrors from hed.errors.error_reporter import ErrorHandler - class ReservedChecker: _instance = None _lock = Lock() @@ -111,10 +110,10 @@ def check_tag_requirements(self, group, reserved_tags): min_allowed, max_allowed = self.get_group_requirements(reserved_tags) if not math.isinf(max_allowed) and len(other_groups) > max_allowed: return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, group=group, - group_count=str(len(other_groups))) + group_count=str(len(other_groups)), tag_list=reserved_tags) if group.is_group and not math.isinf(max_allowed) and min_allowed > len(other_groups): return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, group=group, - group_count=str(len(other_groups))) + group_count=str(len(other_groups)), tag_list=reserved_tags) return [] def get_group_requirements(self, reserved_tags):