From 4aa8704d10467030871416b01ee3094cf019a868 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 9 Aug 2023 18:33:06 -0500 Subject: [PATCH 1/2] Add HTML as a possible output format for printing errors Remove unicode quote characters from a few errors Tweak how errors appear when printed --- hed/errors/error_messages.py | 6 +- hed/errors/error_reporter.py | 236 ++++++++++++++++++++-------- hed/schema/schema_io/wiki2schema.py | 2 +- 3 files changed, 172 insertions(+), 72 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 3591bae83..5cded3402 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -165,13 +165,13 @@ def val_error_sidecar_key_missing(invalid_key, category_keys): @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_INVALID, actual_code=ValidationErrors.DEF_EXPAND_INVALID) def val_error_bad_def_expand(tag, actual_def, found_def): - return f"A data-recording’s Def-expand tag does not match the given definition." + \ + return f"A data-recording's Def-expand tag does not match the given definition." + \ f"Tag: '{tag}'. Actual Def: {actual_def}. Found Def: {found_def}" @hed_tag_error(ValidationErrors.HED_DEF_UNMATCHED, actual_code=ValidationErrors.DEF_INVALID) def val_error_def_unmatched(tag): - return f"A data-recording’s Def tag cannot be matched to definition. Tag: '{tag}'" + return f"A data-recording's Def tag cannot be matched to definition. Tag: '{tag}'" @hed_tag_error(ValidationErrors.HED_DEF_VALUE_MISSING, actual_code=ValidationErrors.DEF_INVALID) @@ -186,7 +186,7 @@ def val_error_def_value_extra(tag): @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_UNMATCHED, actual_code=ValidationErrors.DEF_EXPAND_INVALID) def val_error_def_expand_unmatched(tag): - return f"A data-recording’s Def-expand tag cannot be matched to definition. Tag: '{tag}'" + return f"A data-recording's Def-expand tag cannot be matched to definition. Tag: '{tag}'" @hed_tag_error(ValidationErrors.HED_DEF_EXPAND_VALUE_MISSING, actual_code=ValidationErrors.DEF_EXPAND_INVALID) diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 338e4fefc..6eebe38f3 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -5,8 +5,10 @@ """ from functools import wraps +import xml.etree.ElementTree as ET import copy from hed.errors.error_types import ErrorContext, ErrorSeverity +from hed.errors.known_error_codes import known_error_codes error_functions = {} @@ -32,6 +34,7 @@ ErrorContext.ROW ] + def _register_error_function(error_type, wrapper_func): if error_type in error_functions: raise KeyError(f"{error_type} defined more than once.") @@ -96,7 +99,7 @@ def wrapper(tag, index_in_tag, index_in_tag_end, *args, severity=default_severit Parameters: tag (HedTag): The hed tag object with the problem, index_in_tag (int): The index into the tag with a problem(usually 0), - index_in_tag_end (int): The last index into the tag with a problem(usually len(tag), + index_in_tag_end (int): The last index into the tag with a problem - usually len(tag), args (args): Any other non keyword args. severity (ErrorSeverity): Used to include warnings as well as errors. kwargs (**kwargs): Any keyword args to be passed down to error message function. @@ -324,13 +327,8 @@ def _add_context_to_errors(error_object, error_context_to_add): @staticmethod def _create_error_object(error_type, base_message, severity, **kwargs): - if severity == ErrorSeverity.ERROR: - error_prefix = f"{error_type}: " - else: - error_prefix = f"{error_type} (Warning): " - error_message = error_prefix + base_message error_object = {'code': error_type, - 'message': error_message, + 'message': base_message, 'severity': severity } @@ -417,7 +415,6 @@ def sort_issues(issues, reverse=False): Returns: list: The sorted list of issues.""" def _get_keys(d): - from hed import HedString result = [] for key in default_sort_list: if key in int_sort_list: @@ -431,7 +428,16 @@ def _get_keys(d): return issues -def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True): +def check_for_any_errors(issues_list): + """Returns True if there are any errors with a severity of warning""" + for issue in issues_list: + if issue['severity'] < ErrorSeverity.WARNING: + return True + + return False + + +def get_printable_issue_string(issues, title=None, severity=None, skip_filename=True, add_link=False): """ Return a string with issues list flatted into single string, one per line. Parameters: @@ -439,41 +445,122 @@ def get_printable_issue_string(issues, title=None, severity=None, skip_filename= title (str): Optional title that will always show up first if present(even if there are no validation issues). severity (int): Return only warnings >= severity. skip_filename (bool): If true, don't add the filename context to the printable string. - + add_link (bool): Add a link at the end of message to the appropriate error if True Returns: str: A string containing printable version of the issues or ''. """ - last_used_error_context = [] + if severity is not None: + issues = ErrorHandler.filter_issues_by_severity(issues, severity) + + output_dict = _build_error_context_dict(issues, skip_filename) + issue_string = _error_dict_to_string(output_dict, add_link=add_link) + + if title: + issue_string = title + '\n' + issue_string + return issue_string + + +def get_printable_issue_string_html(issues, title=None, severity=None, skip_filename=True): + """ Return a string with issues list as an HTML tree. + Parameters: + issues (list): Issues to print. + title (str): Optional title that will always show up first if present. + severity (int): Return only warnings >= severity. + skip_filename (bool): If true, don't add the filename context to the printable string. + + Returns: + str: An HTML string containing the issues or ''. + """ if severity is not None: issues = ErrorHandler.filter_issues_by_severity(issues, severity) - issue_string = "" + output_dict = _build_error_context_dict(issues, skip_filename) + + root_element = _create_error_tree(output_dict) + if title: + title_element = ET.Element("h1") + title_element.text = title + root_element.insert(0, title_element) + return ET.tostring(root_element, encoding='unicode') + + +def create_doc_link(error_code): + """If error code is a known code, return a documentation url for it + + Parameters: + error_code(str): A HED error code + + Returns: + url(str or None): The URL if it's a valid code + """ + if error_code in known_error_codes["hed_validation_errors"] \ + or error_code in known_error_codes["schema_validation_errors"]: + modified_error_code = error_code.replace("_", "-").lower() + return f"https://hed-specification.readthedocs.io/en/latest/Appendix_B.html#{modified_error_code}" + return None + + +def _build_error_context_dict(issues, skip_filename): + """Builds the context -> error dictionary for an entire list of issues + + Returns: + dict: A nested dictionary structure with a "children" key at each level for unrelated children. + """ + output_dict = None for single_issue in issues: single_issue_context = _get_context_from_issue(single_issue, skip_filename) - context_string, tab_string = _get_context_string(single_issue_context, last_used_error_context) + output_dict = _add_single_error_to_dict(single_issue_context, output_dict, single_issue) - issue_string += context_string - single_issue_message = tab_string + single_issue['message'] - if "\n" in single_issue_message: - single_issue_message = single_issue_message.replace("\n", "\n" + tab_string) - issue_string += f"{single_issue_message}\n" - last_used_error_context = single_issue_context.copy() + return output_dict - if issue_string: - issue_string += "\n" - if title: - issue_string = title + '\n' + issue_string - return issue_string +def _add_single_error_to_dict(items, root=None, issue_to_add=None): + """ Build a nested dictionary out of the context lists -def check_for_any_errors(issues_list): - for issue in issues_list: - if issue['severity'] < ErrorSeverity.WARNING: - return True + Parameters: + items (list): A list of error contexts + root (dict, optional): An existing nested dictionary structure to update. + issue_to_add (dict, optional): The issue to add at this level of context - return False + Returns: + dict: A nested dictionary structure with a "children" key at each level for unrelated children. + """ + if root is None: + root = {"children": []} + + current_dict = root + for item in items: + # Navigate to the next level if the item already exists, or create a new level + next_dict = current_dict.get(item, {"children": []}) + current_dict[item] = next_dict + current_dict = next_dict + + if issue_to_add: + current_dict["children"].append(issue_to_add) + + return root + + +def _error_dict_to_string(print_dict, add_link=True, level=0): + output = "" + for context, value in print_dict.items(): + if context == "children": + for child in value: + single_issue_message = child["message"] + issue_string = level * "\t" + _get_error_prefix(child) + issue_string += f"{single_issue_message}\n" + if add_link: + link_url = create_doc_link(child['code']) + if link_url: + single_issue_message += f" See... {link_url}" + output += issue_string + continue + output += _format_single_context_string(context[0], context[1], level) + output += _error_dict_to_string(value, add_link, level + 1) + + return output def _get_context_from_issue(val_issue, skip_filename=True): @@ -488,17 +575,38 @@ def _get_context_from_issue(val_issue, skip_filename=True): """ single_issue_context = [] - for key in val_issue: + for key, value in val_issue.items(): if skip_filename and key == ErrorContext.FILE_NAME: continue + if key == ErrorContext.HED_STRING: + value = value.get_original_hed_string() if key.startswith("ec_"): - single_issue_context.append((key, val_issue[key])) + single_issue_context.append((key, str(value))) return single_issue_context +def _get_error_prefix(single_issue): + """Returns the prefix for the error message based on severity and error code. + + Parameters: + single_issue(dict): A single issue object + + Returns: + error_prefix(str): the prefix to use + """ + severity = single_issue.get('severity', ErrorSeverity.ERROR) + error_code = single_issue['code'] + + if severity == ErrorSeverity.ERROR: + error_prefix = f"{error_code}: " + else: + error_prefix = f"{error_code}: (Warning) " + return error_prefix + + def _format_single_context_string(context_type, context, tab_count=0): - """ Return the human readable form of a single context tuple. + """ Return the human-readable form of a single context tuple. Parameters: context_type (str): The context type of this entry. @@ -510,8 +618,6 @@ def _format_single_context_string(context_type, context, tab_count=0): """ tab_string = tab_count * '\t' - if context_type == ErrorContext.HED_STRING: - context = context.get_original_hed_string() error_types = { ErrorContext.FILE_NAME: f"\nErrors in file '{context}'", ErrorContext.SIDECAR_COLUMN_NAME: f"Column '{context}':", @@ -530,39 +636,33 @@ def _format_single_context_string(context_type, context, tab_count=0): return context_string -def _get_context_string(single_issue_context, last_used_context): - """ Convert a single context list into the final human readable output form. - - Parameters: - single_issue_context (list): A list of tuples containing the context(context_type, context) - last_used_context (list): A list of tuples containing the last drawn context. - - Returns: - str: The full string of context(potentially multiline) to add before the error. - str: The tab string to add to the front of any message line with this context. +def _create_error_tree(error_dict, parent_element=None, add_link=True): + if parent_element is None: + parent_element = ET.Element("ul") + + for context, value in error_dict.items(): + if context == "children": + for child in value: + child_li = ET.SubElement(parent_element, "li") + error_prefix = _get_error_prefix(child) + single_issue_message = child["message"] + + # Create a link for the error prefix if add_link is True + if add_link: + link_url = create_doc_link(child['code']) + if link_url: + a_element = ET.SubElement(child_li, "a", href=link_url) + a_element.text = error_prefix + a_element.tail = " " + single_issue_message + else: + child_li.text = error_prefix + " " + single_issue_message + else: + child_li.text = error_prefix + " " + single_issue_message + continue - Notes: - The last used context is always the same format as single_issue_context and used - so that the error handling can only add the parts that have changed. + context_li = ET.SubElement(parent_element, "li") + context_li.text = _format_single_context_string(context[0], context[1]) + context_ul = ET.SubElement(context_li, "ul") + _create_error_tree(value, context_ul, add_link) - """ - context_string = "" - tab_count = 0 - found_difference = False - for i, context_tuple in enumerate(single_issue_context): - (context_type, context) = context_tuple - if len(last_used_context) > i and not found_difference: - last_drawn = last_used_context[i] - # Was drawn, and hasn't changed. - if last_drawn == context_tuple: - if context_type not in no_tab_context: - tab_count += 1 - continue - - context_string += _format_single_context_string(context_type, context, tab_count) - found_difference = True - if context_type not in no_tab_context: - tab_count += 1 - - tab_string = '\t' * tab_count - return context_string, tab_string + return parent_element diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index 1a996fba9..500181d55 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -494,7 +494,7 @@ def _add_fatal_error(self, line_number, line, warning_message="Schema term is em {'code': error_code, ErrorContext.ROW: line_number, ErrorContext.LINE: line, - "message": f"{error_code}: {warning_message}" + "message": f"{warning_message}" } ) From c4650eb26f7e5f2ba1c57a052936f1fa2e01cfef Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 9 Aug 2023 18:34:51 -0500 Subject: [PATCH 2/2] Add missing file --- hed/errors/known_error_codes.py | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 hed/errors/known_error_codes.py diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py new file mode 100644 index 000000000..b72e84708 --- /dev/null +++ b/hed/errors/known_error_codes.py @@ -0,0 +1,45 @@ +known_error_codes = { + "hed_validation_errors": [ + "CHARACTER_INVALID", + "COMMA_MISSING", + "DEF_EXPAND_INVALID", + "DEF_INVALID", + "DEFINITION_INVALID", + "NODE_NAME_EMPTY", + "ONSET_OFFSET_INSET_ERROR", + "PARENTHESES_MISMATCH", + "PLACEHOLDER_INVALID", + "REQUIRED_TAG_MISSING", + "SIDECAR_BRACES_INVALID", + "SIDECAR_INVALID", + "SIDECAR_KEY_MISSING", + "STYLE_WARNING", + "TAG_EMPTY", + "TAG_EXPRESSION_REPEATED", + "TAG_EXTENDED", + "TAG_EXTENSION_INVALID", + "TAG_GROUP_ERROR", + "TAG_INVALID", + "TAG_NAMESPACE_PREFIX_INVALID", + "TAG_NOT_UNIQUE", + "TAG_REQUIRES_CHILD", + "TILDES_UNSUPPORTED", + "UNITS_INVALID", + "UNITS_MISSING", + "VALUE_INVALID", + "VERSION_DEPRECATED" + ], + "schema_validation_errors": [ + "SCHEMA_ATTRIBUTE_INVALID", + "SCHEMA_CHARACTER_INVALID", + "SCHEMA_DUPLICATE_NODE", + "SCHEMA_HEADER_INVALID", + "SCHEMA_LIBRARY_INVALID", + "SCHEMA_SECTION_MISSING", + "SCHEMA_VERSION_INVALID", + "WIKI_DELIMITERS_INVALID", + "WIKI_LINE_START_INVALID", + "WIKI_SEPARATOR_INVALID", + "XML_SYNTAX_INVALID" + ] +} \ No newline at end of file