Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@
from hed.errors.error_types import (ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors,
TemporalErrors, ColumnErrors, TagQualityErrors)

def get_tag_list_str(tag_list):
""" Return a string representation of a list of tags.

Parameters:
tag_list (list): A list of tags to convert to a string.

Returns:
str: A string representation of the list of tags.
"""
return "[" + ', '.join([str(tag) for tag in tag_list]) + "]"


@hed_tag_error(ValidationErrors.UNITS_INVALID)
def val_error_invalid_unit(tag, units):
Expand Down Expand Up @@ -109,9 +120,9 @@ def val_error_duplicate_reserved_tag(tag, group):


@hed_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, actual_code=ValidationErrors.TAG_GROUP_ERROR)
def val_error_group_for_reserved_tag(group, group_count):
def val_error_group_for_reserved_tag(group, group_count, tag_list):
return (f'The number of non-def-expand subgroups for group "{group}" is {group_count}, "'
f'which does not meet reserved tag requirements."')
f'which does not meet reserved tags {get_tag_list_str(tag_list)} requirements."')


@hed_error(ValidationErrors.PARENTHESES_MISMATCH)
Expand Down Expand Up @@ -338,8 +349,7 @@ def def_error_no_group_tags(def_name):

@hed_error(DefinitionErrors.WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.DEFINITION_INVALID)
def def_error_wrong_number_groups(def_name, tag_list):
tag_list_strings = [str(tag) for tag in tag_list]
return f"Too many group tags found in definition for {def_name}. Expected 1, found: {tag_list_strings}"
return f"Too many group tags found in definition for {def_name}. Expected 1, found: {get_tag_list_str(tag_list)}"


@hed_error(DefinitionErrors.WRONG_NUMBER_TAGS, actual_code=ValidationErrors.DEFINITION_INVALID)
Expand All @@ -350,9 +360,8 @@ def def_error_wrong_number_tags(def_name, tag_list):

@hed_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, actual_code=ValidationErrors.DEFINITION_INVALID)
def def_error_wrong_placeholder_count(def_name, expected_count, tag_list):
tag_list_strings = [str(tag) for tag in tag_list]
return f"Incorrect number placeholders or placeholder tags found in definition for {def_name}. " + \
f"Expected {expected_count}, found: {tag_list_strings}"
f"Expected {expected_count}, found: {get_tag_list_str(tag_list)}"


@hed_error(DefinitionErrors.DUPLICATE_DEFINITION, actual_code=ValidationErrors.DEFINITION_INVALID)
Expand Down Expand Up @@ -485,8 +494,8 @@ def missing_task_role(event_type, string, line):

@hed_error(TagQualityErrors.AMBIGUOUS_TAG_GROUPING, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.AMBIGUOUS_TAG_GROUPING)
def ambiguous_tag_grouping(tags, string, line):
return f"The HED string '{string}' at line {line} has ambiguously grouped tags [{tags}] and needs parentheses."
def ambiguous_tag_grouping(tag_list, string, line):
return f"The HED string '{string}' at line {line} has ambiguously grouped tags {get_tag_list_str(tag_list)}and needs parentheses."


@hed_error(TagQualityErrors.MISSING_SENSORY_PRESENTATION, default_severity=ErrorSeverity.WARNING,
Expand Down
44 changes: 23 additions & 21 deletions hed/tools/analysis/event_checker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from hed.errors.error_types import TagQualityErrors
from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
from hed.errors import ErrorHandler, ErrorContext, sort_issues
from hed import TabularInput
from hed.tools import EventManager, HedTagManager

Expand All @@ -20,17 +20,22 @@ class EventChecker:

ALL_ROLES = TASK_ROLES.union(ACTION_ROLES).union(STIMULUS_ROLES)

def __init__(self, hed_obj, line_number, error_handler=None):
def __init__(self, hed_obj, line_number, original_line_number=None, error_handler=None):
""" Constructor for the EventChecker class.

Parameters:
hed_obj (HedString): The HED string to check.
line_number (int or None): The index of the HED string in the file.
original_line_number (int or None): The original line number in the file.
error_handler (ErrorHandler): The ErrorHandler object to use for error handling.

"""
self.hed_obj = hed_obj
self.line_number = line_number
if original_line_number is None:
self.original_line_number = line_number
else:
self.original_line_number = int(original_line_number)
if error_handler is None:
self.error_handler = ErrorHandler()
else:
Expand Down Expand Up @@ -76,15 +81,15 @@ def _check_grouping(self, hed_groups):
event_tags = [tag.short_base_tag for tag in all_tags if tag.short_base_tag in self.EVENT_TAGS]
if not event_tags:
return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_EVENT_TYPE,
string=str(group), line=self.line_number)
string=str(group), line=self.original_line_number)

if len(event_tags) == 1:
return self._check_event_group(group, event_tags[0], all_tags)

# At this point, we know we have multiple event tags in the group.
if any(tag.short_base_tag in event_tags for tag in group.tags()):
return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.IMPROPER_EVENT_GROUPS,
string=str(group), line=self.line_number,
string=str(group), line=self.original_line_number,
event_types =', '.join(event_tags))
hed_groups.extend(group.groups())
return []
Expand Down Expand Up @@ -132,7 +137,7 @@ def _check_task_role(self, hed_group, event_tag, all_tags):

return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_TASK_ROLE,
event_type=event_tag, string=str(hed_group),
line=self.line_number)
line=self.original_line_number)

def _check_presentation_modality(self, hed_group, event_tag, all_tags):
""" Check that a group with a single event sensory event tag
Expand All @@ -151,7 +156,7 @@ def _check_presentation_modality(self, hed_group, event_tag, all_tags):
if any('sensory-presentation' in tag.tag_terms for tag in all_tags):
return []
return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_SENSORY_PRESENTATION,
string=str(hed_group), line=self.line_number)
string=str(hed_group), line=self.original_line_number)

def _check_action_tags(self, hed_group, event_tag, all_tags):
""" Check that a group with a single event tag has at least one task role tag unless it is a non-task event.
Expand All @@ -170,7 +175,7 @@ def _check_action_tags(self, hed_group, event_tag, all_tags):
if any('action' in tag.tag_terms for tag in all_tags):
return []
return ErrorHandler.format_error_with_context(self.error_handler, TagQualityErrors.MISSING_ACTION_TAG,
string=str(hed_group), line=self.line_number)
string=str(hed_group), line=self.original_line_number)

class EventsChecker:
""" Class to check for event tag quality errors in an event file."""
Expand All @@ -191,15 +196,15 @@ def __init__(self, hed_schema, input_data, name=None):
self._schema = hed_schema
self.input_data = input_data
self.name = name
self.group_error_lines = []
self.missing_error_lines = []
self._initialize()

def _initialize(self):

event_manager = EventManager(self.input_data, self._schema)
tag_man = HedTagManager(event_manager, remove_types=self.REMOVE_TYPES)
self.hed_objs = tag_man.get_hed_objs(include_context=False, replace_defs=True)
self.onsets = event_manager.onsets
self.original_index = event_manager.original_index

def validate_event_tags(self):
""" Verify that the events in the HED strings validly represent events.
Expand All @@ -211,12 +216,13 @@ def validate_event_tags(self):
error_handler = ErrorHandler()
error_handler.push_error_context(ErrorContext.FILE_NAME, self.name)
for index, hed_obj in enumerate(self.hed_objs):
if not hed_obj:
if not hed_obj or hed_obj is None:
continue
error_handler.push_error_context(ErrorContext.LINE, index)
event_check = EventChecker(hed_obj, index, error_handler)
error_handler.push_error_context(ErrorContext.LINE, int(self.original_index.iloc[index]))
event_check = EventChecker(hed_obj, index, int(self.original_index.iloc[index]), error_handler)
issues += event_check.issues
error_handler.pop_error_context()
issues = sort_issues(issues)
return issues

def insert_issue_details(self, issues):
Expand All @@ -231,20 +237,17 @@ def insert_issue_details(self, issues):
line = issue.get('ec_line')
if line is None:
continue
lines = self.get_onset_lines(line)
data_info = self.input_data._dataframe.iloc[lines]
details = ["Sources:"]
for index, row in data_info.iterrows():
details += EventsChecker.get_issue_details(row, index, side_data)
data_info = self.input_data._dataframe.iloc[line]
details = [f"Sources: line:{line} onset:{self.onsets[line]}"] + \
EventsChecker.get_issue_details(data_info, side_data)
issue['details'] = details

@staticmethod
def get_issue_details(data_info, line, side_data):
def get_issue_details(data_info, side_data):
""" Get the source details for the issue.

Parameters:
data_info (pd.Series): The row information from the original tsv.
line (list): A list of lines from the original tsv.
side_data (pd.Series): The sidecar data.

Returns:
Expand All @@ -260,8 +263,7 @@ def get_issue_details(data_info, line, side_data):
col_line = f" => sidecar_source:{EventsChecker.get_hed_source(side_data[col].hed_dict, value)}"
if not col_line and col != 'HED':
continue
col_line = f"\t[line:{line} column_name:{col} column_value:{data_info[col]}]" + col_line
details.append(col_line)
details.append(f"\t[Column_name:{col} Column_value:{data_info[col]}]" + col_line)
return details

@staticmethod
Expand Down
2 changes: 2 additions & 0 deletions hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(self, input_data, hed_schema, extra_defs=None):
self.input_data = input_data
self.def_dict = input_data.get_def_dict(hed_schema, extra_def_dicts=extra_defs)
self.onsets = None # list of onset times or None if not an events file
self.original_index = None # list of original indices of the events
self.base = None # list of strings containing the starts of event processes
self.context = None # list of strings containing the contexts of event processes
self.hed_strings = None # list of HedString objects without the temporal events
Expand Down Expand Up @@ -62,6 +63,7 @@ def _create_event_list(self, input_data):

hed_strings = [HedString(hed_string, self.hed_schema) for hed_string in delay_df.HED]
self.onsets = pd.to_numeric(delay_df.onset, errors='coerce')
self.original_index = pd.to_numeric(delay_df.original_index, errors='coerce')
self.event_list = [[] for _ in range(len(hed_strings))]
onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet.
for event_index, hed in enumerate(hed_strings):
Expand Down
5 changes: 2 additions & 3 deletions hed/validator/reserved_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from hed.errors.error_types import ValidationErrors, TemporalErrors
from hed.errors.error_reporter import ErrorHandler


class ReservedChecker:
_instance = None
_lock = Lock()
Expand Down Expand Up @@ -111,10 +110,10 @@ def check_tag_requirements(self, group, reserved_tags):
min_allowed, max_allowed = self.get_group_requirements(reserved_tags)
if not math.isinf(max_allowed) and len(other_groups) > max_allowed:
return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, group=group,
group_count=str(len(other_groups)))
group_count=str(len(other_groups)), tag_list=reserved_tags)
if group.is_group and not math.isinf(max_allowed) and min_allowed > len(other_groups):
return ErrorHandler.format_error(ValidationErrors.HED_RESERVED_TAG_GROUP_ERROR, group=group,
group_count=str(len(other_groups)))
group_count=str(len(other_groups)), tag_list=reserved_tags)
return []

def get_group_requirements(self, reserved_tags):
Expand Down