Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 27 additions & 7 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ def val_error_extra_slashes_spaces(tag, problem_tag):


@hed_error(ValidationErrors.SIDECAR_KEY_MISSING, default_severity=ErrorSeverity.WARNING)
def val_error_sidecar_key_missing(invalid_keys, category_keys):
return f"Category keys '{invalid_keys}' do not exist in column. Valid keys are: {category_keys}"
def val_error_sidecar_key_missing(invalid_keys, category_keys, column_name):
return f"Category keys {invalid_keys} do not exist in sidecar for column '{column_name}'. Valid keys are: {category_keys}"


@hed_error(ValidationErrors.TSV_COLUMN_MISSING, actual_code=ValidationErrors.SIDECAR_KEY_MISSING,
Expand Down Expand Up @@ -470,14 +470,34 @@ def missing_event_type(string, line):
return f"The HED string '{string}' at line {line} has no Event type."


@hed_error(TagQualityErrors.IMPROPER_EVENT_GROUPS, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.IMPROPER_EVENT_GROUPS)
def improper_event_groups(event_types, string, line):
return f"The HED string '{string}' at line {line} has multiple events [{event_types}] but is improperly " + \
f"parenthesized so the other tags cannot be uniquely associated with an event."


@hed_error(TagQualityErrors.MISSING_TASK_ROLE, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.MISSING_TASK_ROLE)
def missing_task_role(event_type, string, line):
return f"The HED string '{string}' at line {line} with event {event_type} has no Task-event-role type tag."


@hed_error(TagQualityErrors.IMPROPER_TAG_GROUPING, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.IMPROPER_TAG_GROUPING)
def improper_tag_grouping(event_types, string, line):
return f"The HED string '{string}' at line {line} has multiple events [{event_types}] but is improperly " + \
f"parenthesized so the other tags cannot be uniquely associated with an event."
@hed_error(TagQualityErrors.AMBIGUOUS_TAG_GROUPING, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.AMBIGUOUS_TAG_GROUPING)
def ambiguous_tag_grouping(tags, string, line):
return f"The HED string '{string}' at line {line} has ambiguously grouped tags [{tags}] and needs parentheses."


@hed_error(TagQualityErrors.MISSING_SENSORY_PRESENTATION, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.MISSING_SENSORY_PRESENTATION)
def missing_sensory_presentation(string, line):
return f"The HED string '{string}' at line {line} is a Sensory-event but does not have a sensory presentation " + \
f"modality tag such as Visual-presentation or Auditory-presentation."


@hed_error(TagQualityErrors.MISSING_ACTION_TAG, default_severity=ErrorSeverity.WARNING,
actual_code=TagQualityErrors.MISSING_ACTION_TAG)
def missing_action_tag(string, line):
return f"The HED string '{string}' at line {line} is an Agent-action event but does not any Action tags " + \
f"such as Move or Perform."
135 changes: 104 additions & 31 deletions hed/errors/error_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"""

from functools import wraps
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as et
from collections import defaultdict

from hed.errors.error_types import ErrorContext, ErrorSeverity
from hed.errors.known_error_codes import known_error_codes
Expand Down Expand Up @@ -47,7 +48,7 @@ def hed_error(error_type, default_severity=ErrorSeverity.ERROR, actual_code=None

Parameters:
error_type (str): A value from error_types or optionally another value.
default_severity (ErrorSeverity): The default severity for the decorated error.
default_severity (int): The default severity for the decorated error.
actual_code (str): The actual error to report to the outside world.

"""
Expand All @@ -61,7 +62,7 @@ def wrapper(*args, severity=default_severity, **kwargs):

Parameters:
args (args): non keyword args.
severity (ErrorSeverity): Will override the default error value if passed.
severity (int): Will override the default error value if passed.
kwargs (**kwargs): Any keyword args to be passed down to error message function.

Returns:
Expand All @@ -82,7 +83,7 @@ def hed_tag_error(error_type, default_severity=ErrorSeverity.ERROR, has_sub_tag=

Parameters:
error_type (str): A value from error_types or optionally another value.
default_severity (ErrorSeverity): The default severity for the decorated error.
default_severity (int): The default severity for the decorated error.
has_sub_tag (bool): If True, this error message also wants a sub_tag passed down. eg "This" in "This/Is/A/Tag"
actual_code (str): The actual error to report to the outside world.

Expand All @@ -101,7 +102,7 @@ def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severit
index_in_tag (int): The index into the tag with a problem(usually 0).
index_in_tag_end (int): The last index into the tag with a problem - usually len(tag).
args (args): Any other non keyword args.
severity (ErrorSeverity): Used to include warnings as well as errors.
severity (int): Used to include warnings as well as errors.
kwargs (**kwargs): Any keyword args to be passed down to error message function.

Returns:
Expand Down Expand Up @@ -218,6 +219,19 @@ def reset_error_context(self):
"""
self.error_context = []

def add_context_and_filter(self, issues):
""" Filter out warnings if requested, while adding context to issues.

issues(list):
list: A list containing a single dictionary representing a single error.
"""
if not self._check_for_warnings:
issues[:] = self.filter_issues_by_severity(issues, ErrorSeverity.ERROR)

for error_object in issues:
self._add_context_to_errors(error_object, self.error_context)
self._update_error_with_char_pos(error_object)

def format_error_with_context(self, *args, **kwargs):
error_object = ErrorHandler.format_error(*args, **kwargs)
if self is not None:
Expand Down Expand Up @@ -259,19 +273,6 @@ def format_error(error_type, *args, actual_error=None, **kwargs):

return [error_object]

def add_context_and_filter(self, issues):
""" Filter out warnings if requested, while adding context to issues.

issues(list):
list: A list containing a single dictionary representing a single error.
"""
if not self._check_for_warnings:
issues[:] = self.filter_issues_by_severity(issues, ErrorSeverity.ERROR)

for error_object in issues:
self._add_context_to_errors(error_object, self.error_context)
self._update_error_with_char_pos(error_object)

@staticmethod
def format_error_from_context(error_type, error_context, *args, actual_error=None, **kwargs):
""" Format an error based on the error type.
Expand Down Expand Up @@ -392,6 +393,57 @@ def filter_issues_by_severity(issues_list, severity):
"""
return [issue for issue in issues_list if issue['severity'] <= severity]

@staticmethod
def filter_issues_by_count(issues, count, by_file=False):
""" Filter the issues list to only include the first count issues of each code.

Parameters:
issues (list): A list of dictionaries containing the full issue list.
count (int): The number of issues to keep for each code.
by_file (bool): If True, group by file name.

Returns:
list: A list of dictionaries containing the issue list after filtering by count.
dict: A dictionary with the codes as keys and the number of occurrences as values.

"""
total_seen = {}
file_dicts = {'': {}}
filtered_issues = []
for issue in issues:
seen_codes = file_dicts['']
if by_file and 'ec_filename' in issue:
file_name = issue['ec_filename']
if file_name not in file_dicts:
file_dicts[file_name] = {}
seen_codes = file_dicts[file_name]

code = issue['code']
if code not in seen_codes:
seen_codes[code] = 0
seen_codes[code] += 1
if seen_codes[code] > count:
continue
filtered_issues.append(issue)

return filtered_issues, ErrorHandler.aggregate_code_counts(file_dicts)

@staticmethod
def aggregate_code_counts(file_code_dict):
""" Aggregate the counts of codes across multiple files.

Parameters:
file_code_dict (dict): A dictionary where keys are filenames and values are dictionaries of code counts.

Returns:
dict: A dictionary with the aggregated counts of codes across all files.
"""
total_counts = defaultdict(int)
for file_dict in file_code_dict.values():
for code, count in file_dict.items():
total_counts[code] += count
return dict(total_counts)


def sort_issues(issues, reverse=False):
"""Sort a list of issues by the error context values.
Expand Down Expand Up @@ -425,7 +477,7 @@ def check_for_any_errors(issues_list):
return False


def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True, add_link=False):
def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True, add_link=False, show_details=False):
""" Return a string with issues list flatted into single string, one per line.

Parameters:
Expand All @@ -434,6 +486,7 @@ def get_printable_issue_string(issues, title=None, severity=None, skip_filename=
severity (int): Return only warnings >= severity.
skip_filename (bool): If True, don't add the filename context to the printable string.
add_link (bool): Add a link at the end of message to the appropriate error if True
show_details (bool): If True, show details about the issues.
Returns:
str: A string containing printable version of the issues or ''.

Expand All @@ -442,21 +495,22 @@ def get_printable_issue_string(issues, title=None, severity=None, skip_filename=
issues = ErrorHandler.filter_issues_by_severity(issues, severity)

output_dict = _build_error_context_dict(issues, skip_filename)
issue_string = _error_dict_to_string(output_dict, add_link=add_link)
issue_string = _error_dict_to_string(output_dict, add_link=add_link, show_details=show_details)

if title:
issue_string = title + '\n' + issue_string
return issue_string


def get_printable_issue_string_html(issues, title=None, severity=None, skip_filename=True):
def get_printable_issue_string_html(issues, title=None, severity=None, skip_filename=True, show_details=False):
""" Return a string with issues list as an HTML tree.

Parameters:
issues (list): Issues to print.
title (str): Optional title that will always show up first if present.
severity (int): Return only warnings >= severity.
skip_filename (bool): If True, don't add the filename context to the printable string.
show_details (bool): If True, show details about the issues.

Returns:
str: An HTML string containing the issues or ''.
Expand All @@ -468,10 +522,10 @@ def get_printable_issue_string_html(issues, title=None, severity=None, skip_file

root_element = _create_error_tree(output_dict)
if title:
title_element = ET.Element("h1")
title_element = et.Element("h1")
title_element.text = title
root_element.insert(0, title_element)
return ET.tostring(root_element, encoding='unicode')
return et.tostring(root_element, encoding='unicode')

def iter_errors(issues):
""" An iterator over issues represented as flat dictionaries.
Expand Down Expand Up @@ -556,7 +610,7 @@ def _add_single_error_to_dict(items, root=None, issue_to_add=None):
return root


def _error_dict_to_string(print_dict, add_link=True, level=0):
def _error_dict_to_string(print_dict, add_link=True, show_details=False, level=0):
output = ""
if print_dict is None:
return output
Expand All @@ -569,15 +623,34 @@ def _error_dict_to_string(print_dict, add_link=True, level=0):
if add_link:
link_url = create_doc_link(child['code'])
if link_url:
single_issue_message += f" See... {link_url}"
single_issue_message += "\n" + (level + 1) * "\t" + f" See... {link_url}"
if show_details and "details" in child:
issue_string += _expand_details(child["details"], level + 1)
output += issue_string
continue
output += _format_single_context_string(context[0], context[1], level)
output += _error_dict_to_string(value, add_link, level + 1)
output += _error_dict_to_string(value, add_link, show_details, level + 1)

return output


def _expand_details(details, indent=0):
""" Expand the details of an error into a string.

Parameters:
details (str): The details to expand.
indent (int): The indentation level.

Returns:
str: The expanded details string.
"""
if not details:
return ""
expanded_details = ""
for line in details:
expanded_details += indent * "\t" + line + "\n"
return expanded_details

def _get_context_from_issue(val_issue, skip_filename=True):
""" Extract all the context values from the given issue.

Expand Down Expand Up @@ -653,20 +726,20 @@ def _format_single_context_string(context_type, context, tab_count=0):

def _create_error_tree(error_dict, parent_element=None, add_link=True):
if parent_element is None:
parent_element = ET.Element("ul")
parent_element = et.Element("ul")

for context, value in error_dict.items():
if context == "children":
for child in value:
child_li = ET.SubElement(parent_element, "li")
child_li = et.SubElement(parent_element, "li")
error_prefix = _get_error_prefix(child)
single_issue_message = child["message"]

# Create a link for the error prefix if add_link is True.
if add_link:
link_url = create_doc_link(child['code'])
if link_url:
a_element = ET.SubElement(child_li, "a", href=link_url)
a_element = et.SubElement(child_li, "a", href=link_url)
a_element.text = error_prefix
a_element.tail = " " + single_issue_message
else:
Expand All @@ -675,9 +748,9 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True):
child_li.text = error_prefix + " " + single_issue_message
continue

context_li = ET.SubElement(parent_element, "li")
context_li = et.SubElement(parent_element, "li")
context_li.text = _format_single_context_string(context[0], context[1])
context_ul = ET.SubElement(context_li, "ul")
context_ul = et.SubElement(context_li, "ul")
_create_error_tree(value, context_ul, add_link)

return parent_element
Expand Down
5 changes: 4 additions & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,8 @@ class ColumnErrors:

class TagQualityErrors:
MISSING_EVENT_TYPE = "MISSING_EVENT_TYPE"
IMPROPER_EVENT_GROUPS = "IMPROPER_EVENT_GROUPS"
MISSING_TASK_ROLE = "MISSING_TASK_ROLE"
IMPROPER_TAG_GROUPING = "IMPROPER_TAG_GROUPING"
AMBIGUOUS_TAG_GROUPING = "AMBIGUOUS_TAG_GROUPING"
MISSING_ACTION_TAG = "MISSING_ACTION_TAG"
MISSING_SENSORY_PRESENTATION = "MISSING_SENSORY_PRESENTATION"
2 changes: 2 additions & 0 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,14 @@ def series_filtered(self):
"""
if self.onsets is not None:
return filter_series_by_onset(self.series_a, self.onsets)
return None

@property
def onsets(self):
"""Return the onset column if it exists. """
if "onset" in self.columns:
return self._dataframe["onset"]
return None

@property
def needs_sorting(self):
Expand Down
2 changes: 1 addition & 1 deletion hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def replace_ref(text, oldvalue, newvalue="n/a"):
str: The modified string with the ref replaced or removed.
"""
# If it's not n/a, we can just replace directly.
if newvalue != "n/a":
if newvalue != "n/a" and newvalue != "":
return text.replace(oldvalue, newvalue)

def _remover(match):
Expand Down
Loading