Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hed/errors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues
from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references
from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
ValidationErrors, ColumnErrors
from .error_types import ErrorContext, ErrorSeverity
Expand Down
5 changes: 5 additions & 0 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,11 @@ def onset_error_offset_before_onset(tag):
return f"Offset tag '{tag}' does not have a matching onset."


@hed_tag_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
def onset_error_same_defs_one_row(tag, def_name):
return f"'{tag}' uses name '{def_name}', which was already used at this onset time."


@hed_tag_error(OnsetErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
def onset_error_inset_before_onset(tag):
return f"Inset tag '{tag}' does not have a matching onset."
Expand Down
28 changes: 28 additions & 0 deletions hed/errors/error_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,3 +671,31 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True):
_create_error_tree(value, context_ul, add_link)

return parent_element


def replace_tag_references(list_or_dict):
"""Utility function to remove any references to tags, strings, etc from any type of nested list or dict

Use this if you want to save out issues to a file.

If you'd prefer a copy returned, use replace_tag_references(list_or_dict.copy())

Parameters:
list_or_dict(list or dict): An arbitrarily nested list/dict structure
"""
if isinstance(list_or_dict, dict):
for key, value in list_or_dict.items():
if isinstance(value, (dict, list)):
replace_tag_references(value)
elif isinstance(value, (bool, float, int)):
list_or_dict[key] = value
else:
list_or_dict[key] = str(value)
elif isinstance(list_or_dict, list):
for key, value in enumerate(list_or_dict):
if isinstance(value, (dict, list)):
replace_tag_references(value)
elif isinstance(value, (bool, float, int)):
list_or_dict[key] = value
else:
list_or_dict[key] = str(value)
2 changes: 2 additions & 0 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ class OnsetErrors:
ONSET_TOO_MANY_DEFS = "ONSET_TOO_MANY_DEFS"
ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET"
ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"


class ColumnErrors:
INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
Expand Down
46 changes: 44 additions & 2 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,54 @@ def dataframe_a(self):
@property
def series_a(self):
"""Return the assembled dataframe as a series
Probably a placeholder name.

Returns:
Series: the assembled dataframe with columns merged"""
Series: the assembled dataframe with columns merged
"""
return self.combine_dataframe(self.assemble())

@property
def series_filtered(self):
"""Return the assembled dataframe as a series, with rows that have the same onset combined

Returns:
Series: the assembled dataframe with columns merged, and the rows filtered together
"""
if self.onsets is not None:
indexed_dict = self._indexed_dict_from_onsets(self.onsets.astype(float))
return self._filter_by_index_list(self.series_a, indexed_dict=indexed_dict)

@staticmethod
def _indexed_dict_from_onsets(onsets):
current_onset = -1000000.0
tol = 1e-9
from collections import defaultdict
indexed_dict = defaultdict(list)
for i, onset in enumerate(onsets):
if abs(onset - current_onset) > tol:
current_onset = onset
indexed_dict[current_onset].append(i)

return indexed_dict

@staticmethod
def _filter_by_index_list(original_series, indexed_dict):
new_series = ["n/a"] * len(original_series) # Initialize new_series with "n/a"

for onset, indices in indexed_dict.items():
if indices:
first_index = indices[0] # Take the first index of each onset group
# Join the corresponding original series entries and place them at the first index
new_series[first_index] = ",".join([str(original_series[i]) for i in indices])

return new_series

@property
def onsets(self):
"""Returns the onset column if it exists"""
if "onset" in self.columns:
return self._dataframe["onset"]

@property
def name(self):
""" Name of the data. """
Expand Down
73 changes: 72 additions & 1 deletion hed/validator/def_validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from hed.models.hed_string import HedString
from hed.models.hed_tag import HedTag
from hed.models.hed_group import HedGroup
from hed.models.definition_dict import DefinitionDict
from hed.errors.error_types import ValidationErrors
from hed.errors.error_reporter import ErrorHandler
from hed.models.model_constants import DefTagNames
from hed.errors.error_types import OnsetErrors


class DefValidator(DefinitionDict):
""" Handles validating Def/ and Def-expand/.
""" Handles validating Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset

"""
def __init__(self, def_dicts=None, hed_schema=None):
Expand Down Expand Up @@ -128,3 +131,71 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator):
def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag)

return def_issues

def validate_onset_offset(self, hed_string_obj):
""" Validate onset/offset

Parameters:
hed_string_obj (HedString): The hed string to check.

Returns:
list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
"""
onset_issues = []
for found_onset, found_group in self._find_onset_tags(hed_string_obj):
if not found_onset:
return []

def_tags = found_group.find_def_tags()
if not def_tags:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
continue

if len(def_tags) > 1:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS,
tag=def_tags[0][0],
tag_list=[tag[0] for tag in def_tags[1:]])
continue

# Get all children but def group and onset/offset, then validate #/type of children.
def_tag, def_group, _ = def_tags[0]
if def_group is None:
def_group = def_tag
children = [child for child in found_group.children if
def_group is not child and found_onset is not child]
max_children = 1
if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY:
max_children = 0
if len(children) > max_children:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS,
def_tag,
found_group.children)
continue

if children:
# Make this a loop if max_children can be > 1
child = children[0]
if not isinstance(child, HedGroup):
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
child,
def_tag)

# At this point we have either an onset or offset tag and it's name
onset_issues += self._handle_onset_or_offset(def_tag)

return onset_issues

def _find_onset_tags(self, hed_string_obj):
return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS)

def _handle_onset_or_offset(self, def_tag):
def_name, _, placeholder = def_tag.extension.partition('/')

def_entry = self.defs.get(def_name.lower())
if def_entry is None:
return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
if bool(def_entry.takes_value) != bool(placeholder):
return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
has_placeholder=bool(def_entry.takes_value))

return []
8 changes: 2 additions & 6 deletions hed/validator/hed_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,17 @@
from hed.models import HedTag
from hed.validator.tag_validator import TagValidator
from hed.validator.def_validator import DefValidator
from hed.validator.onset_validator import OnsetValidator


class HedValidator:
""" Top level validation of HED strings. """

def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, definitions_allowed=False):
def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False):
""" Constructor for the HedValidator class.

Parameters:
hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation.
def_dicts(DefinitionDict or list or dict): the def dicts to use for validation
run_full_onset_checks(bool): If True, check for matching onset/offset tags
definitions_allowed(bool): If False, flag definitions found as errors
"""
super().__init__()
Expand All @@ -33,8 +31,6 @@ def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, defin

self._tag_validator = TagValidator(hed_schema=self._hed_schema)
self._def_validator = DefValidator(def_dicts, hed_schema)
self._onset_validator = OnsetValidator(def_dict=self._def_validator,
run_full_onset_checks=run_full_onset_checks)
self._definitions_allowed = definitions_allowed

def validate(self, hed_string, allow_placeholders, error_handler=None):
Expand Down Expand Up @@ -80,7 +76,7 @@ def run_full_string_checks(self, hed_string):
issues = []
issues += self._validate_tags_in_hed_string(hed_string)
issues += self._validate_groups_in_hed_string(hed_string)
issues += self._onset_validator.validate_onset_offset(hed_string)
issues += self._def_validator.validate_onset_offset(hed_string)
return issues

def _validate_groups_in_hed_string(self, hed_string_obj):
Expand Down
86 changes: 27 additions & 59 deletions hed/validator/onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
class OnsetValidator:
""" Validates onset/offset pairs. """

def __init__(self, def_dict, run_full_onset_checks=True):
self._defs = def_dict
def __init__(self):
self._onsets = {}
self._run_full_onset_checks = run_full_onset_checks

def validate_onset_offset(self, hed_string_obj):
""" Validate onset/offset
def validate_temporal_relations(self, hed_string_obj):
""" Validate onset/offset/inset tag relations

Parameters:
hed_string_obj (HedString): The hed string to check.
Expand All @@ -22,76 +20,46 @@ def validate_onset_offset(self, hed_string_obj):
list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
"""
onset_issues = []
for found_onset, found_group in self._find_onset_tags(hed_string_obj):
if not found_onset:
used_def_names = set()
for temporal_tag, temporal_group in self._find_temporal_tags(hed_string_obj):
if not temporal_tag:
return []

def_tags = found_group.find_def_tags()
def_tags = temporal_group.find_def_tags(include_groups=0)
if not def_tags:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
continue

if len(def_tags) > 1:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS,
tag=def_tags[0][0],
tag_list=[tag[0] for tag in def_tags[1:]])
def_tag = def_tags[0]
def_name = def_tag.extension
if def_name.lower() in used_def_names:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, tag=temporal_tag,
def_name=def_name)
continue

# Get all children but def group and onset/offset, then validate #/type of children.
def_tag, def_group, _ = def_tags[0]
if def_group is None:
def_group = def_tag
children = [child for child in found_group.children if
def_group is not child and found_onset is not child]
max_children = 1
if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY:
max_children = 0
if len(children) > max_children:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS,
def_tag,
found_group.children)
continue

if children:
# Make this a loop if max_children can be > 1
child = children[0]
if not isinstance(child, HedGroup):
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
child,
def_tag)
used_def_names.add(def_tag.extension.lower())

# At this point we have either an onset or offset tag and it's name
onset_issues += self._handle_onset_or_offset(def_tag, found_onset)
onset_issues += self._handle_onset_or_offset(def_tag, temporal_tag)

return onset_issues

def _find_onset_tags(self, hed_string_obj):
def _find_temporal_tags(self, hed_string_obj):
return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS)

def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_ORG_KEY
full_def_name = def_tag.extension
def_name, _, placeholder = def_tag.extension.partition('/')

def_entry = self._defs.get(def_name)
if def_entry is None:
return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
if bool(def_entry.takes_value) != bool(placeholder):
return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
has_placeholder=bool(def_entry.takes_value))

if self._run_full_onset_checks:
if is_onset:
# onset can never fail as it implies an offset
self._onsets[full_def_name.lower()] = full_def_name
else:
is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_ORG_KEY
if full_def_name.lower() not in self._onsets:
if is_offset:
return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
else:
return ErrorHandler.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=def_tag)
elif is_offset:
del self._onsets[full_def_name.lower()]
if is_onset:
# onset can never fail as it implies an offset
self._onsets[full_def_name.lower()] = full_def_name
else:
is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_ORG_KEY
if full_def_name.lower() not in self._onsets:
if is_offset:
return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
else:
return ErrorHandler.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=def_tag)
elif is_offset:
del self._onsets[full_def_name.lower()]

return []
1 change: 0 additions & 1 deletion hed/validator/sidecar_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts)
hed_validator = HedValidator(self._schema,
def_dicts=sidecar_def_dict,
run_full_onset_checks=False,
definitions_allowed=True)

issues += sidecar._extract_definition_issues
Expand Down
Loading