Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ def val_error_CURLY_BRACE_UNSUPPORTED_HERE(tag, problem_tag):
return (f"Curly braces are only permitted in sidecars, fully wrapping text in place of a tag. "
f"Invalid character '{problem_tag}' in tag '{tag}'")


@hed_error(ValidationErrors.ONSETS_OUT_OF_ORDER, default_severity=ErrorSeverity.WARNING)
def val_error_ONSETS_OUT_OF_ORDER():
return "Onsets need to be temporally increasing for most downstream tools to work."


@hed_error(ValidationErrors.COMMA_MISSING)
def val_error_comma_missing(tag):
return f"Comma missing after - '{tag}'"
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class ValidationErrors:
INVALID_TAG_CHARACTER = 'invalidTagCharacter'

CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE"

ONSETS_OUT_OF_ORDER = "ONSETS_OUT_OF_ORDER"


class SidecarErrors:
Expand Down
2 changes: 1 addition & 1 deletion hed/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .column_metadata import ColumnMetadata, ColumnType
from .definition_dict import DefinitionDict
from .definition_entry import DefinitionEntry
from .expression_parser import QueryParser
from .query_handler import QueryHandler
from .hed_group import HedGroup
from .spreadsheet_input import SpreadsheetInput
from .hed_string import HedString
Expand Down
8 changes: 8 additions & 0 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,14 @@ def onsets(self):
if "onset" in self.columns:
return self._dataframe["onset"]

@property
def needs_sorting(self):
"""Returns True if this both has an onset column, and it needs sorting."""
onsets = self.onsets
if onsets is not None:
onsets = onsets.astype(float)
return not onsets.is_monotonic_increasing

@property
def name(self):
""" Name of the data. """
Expand Down
41 changes: 25 additions & 16 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from hed.models.definition_dict import DefinitionDict


def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True):
def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, shrink_defs=False, expand_defs=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file with.

Args:
Expand All @@ -20,8 +19,6 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
join_columns: bool
If True, join all HED columns into one.
shrink_defs: bool
Shrink any def-expand tags found
expand_defs: bool
Expand All @@ -41,19 +38,12 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
if sidecar:
def_dict = sidecar.get_def_dict(hed_schema=hed_schema, extra_def_dicts=extra_def_dicts)

if join_columns:
if expand_defs:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict
if expand_defs:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
elif shrink_defs:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
else:
return [[HedString(x, hed_schema, def_dict).expand_defs() if expand_defs
else HedString(x, hed_schema, def_dict).shrink_defs() if shrink_defs
else HedString(x, hed_schema, def_dict)
for x in text_file_row] for text_file_row in tabular_file.dataframe_a.itertuples(index=False)], \
def_dict
return [HedString(x, hed_schema, def_dict) for x in tabular_file.series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
Expand Down Expand Up @@ -151,3 +141,22 @@ def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs
from hed.models.def_expand_gather import DefExpandGatherer
def_gatherer = DefExpandGatherer(hed_schema, known_defs, ambiguous_defs)
return def_gatherer.process_def_expands(hed_strings)


def sort_dataframe_by_onsets(df):
""" Gather def-expand tags in the strings/compare with known definitions to find any differences

Parameters:
df(pd.Dataframe): Dataframe to sort
Returns:
The sorted dataframe, or the original dataframe if it didn't have an onset column.
"""
if "onset" in df.columns:
# Create a copy and sort by onsets as floats(if needed), but continue to keep the string version.
df_copy = df.copy()
df_copy['_temp_onset_sort'] = df_copy['onset'].astype(float)
df_copy.sort_values(by='_temp_onset_sort', inplace=True)
df_copy.drop(columns=['_temp_onset_sort'], inplace=True)

return df_copy
return df
Loading