Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,21 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
has_column_names (bool): True if file has column names.
This value is ignored if you pass in a pandas dataframe.
mapper (ColumnMapper or None): Indicates which columns have HED tags.
See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.
name (str or None): Optional field for how this file will report errors.
allow_blank_names(bool): If True, column names can be blank
Notes:
- See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.

:raises HedFileError:
- file is blank
- An invalid dataframe was passed with size 0
- An invalid extension was provided
- A duplicate or empty column name appears

:raises OSError:
- Cannot open the indicated file

:raises KeyError:
- The specified worksheet name does not exist
"""
if mapper is None:
mapper = ColumnMapper()
Expand Down Expand Up @@ -94,7 +104,6 @@ def reset_mapper(self, new_mapper):

Parameters:
new_mapper (ColumnMapper): A column mapper to be associated with this base input.

"""
self._mapper = new_mapper
if not self._mapper:
Expand Down Expand Up @@ -200,8 +209,10 @@ def to_excel(self, file):
file (str or file-like): Location to save this base input.

:raises ValueError:
- if empty file object or file cannot be opened.

- if empty file object was passed

:raises OSError:
- Cannot open the indicated file
"""
if not file:
raise ValueError("Empty file name or object passed in to BaseInput.save.")
Expand Down Expand Up @@ -232,6 +243,8 @@ def to_csv(self, file=None):
Returns:
None or str: None if file is given or the contents as a str if file is None.

:raises OSError:
- Cannot open the indicated file
"""
dataframe = self._dataframe
csv_string_if_filename_none = dataframe.to_csv(file, '\t', index=False, header=self._has_column_names)
Expand Down Expand Up @@ -272,6 +285,15 @@ def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_ta

Notes:
Any attribute of a HedTag that returns a string is a valid value of tag_form.

:raises ValueError:
- There is not a loaded dataframe

:raises KeyError:
- the indicated row/column does not exist

:raises AttributeError:
- The indicated tag_form is not an attribute of HedTag
"""
if self._dataframe is None:
raise ValueError("No data frame loaded")
Expand All @@ -291,6 +313,8 @@ def get_worksheet(self, worksheet_name=None):
Notes:
If None, returns the first worksheet.

:raises KeyError:
- The specified worksheet name does not exist
"""
if worksheet_name and self._loaded_workbook:
# return self._loaded_workbook.get_sheet_by_name(worksheet_name)
Expand Down
16 changes: 12 additions & 4 deletions hed/models/column_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None
Sidecar column definitions will take precedent if there is a conflict with tag_columns.
column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag
prefixes to prepend to the tags in that column before processing.

optional_tag_columns (list): A list of ints or strings containing the columns that contain
the HED tags. If the column is otherwise unspecified, convert this column type to HEDTags.
warn_on_missing_column (bool): If True, issue mapping warnings on column names that are missing from
Expand Down Expand Up @@ -89,6 +88,10 @@ def column_prefix_dictionary(self):
def get_transformers(self):
""" Return the transformers to use on a dataframe

Returns:
tuple(dict, list):
dict({str or int: func}): the functions to use to transform each column
need_categorical(list of int): a list of columns to treat as categoriacl
"""
final_transformers = {}
need_categorical = []
Expand Down Expand Up @@ -144,8 +147,8 @@ def _set_sidecar(self, sidecar):
Parameters:
sidecar (Sidecar or None): the sidecar to use

Returns:

:raises ValueError:
- A sidecar was prevoiusly set
"""
if self._sidecar:
raise ValueError("Trying to set a second sidecar on a column mapper.")
Expand All @@ -156,6 +159,11 @@ def _set_sidecar(self, sidecar):

@property
def sidecar_column_data(self):
""" Pass through to get the sidecar ColumnMetadata

Returns:
dict({str:ColumnMetadata}): the column metadata defined by this sidecar
"""
if self._sidecar:
return self._sidecar.column_data

Expand All @@ -168,7 +176,7 @@ def get_tag_columns(self):

Returns:
column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
0-based if integer-based, otherwise column name.
0-based if integer-based, otherwise column name.
"""
return [column_entry.column_name for number, column_entry in self._final_column_map.items()
if column_entry.column_type == ColumnType.HEDTags]
Expand Down
14 changes: 14 additions & 0 deletions hed/models/column_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def source_dict(self):
return self._source[self.column_name]

def get_hed_strings(self):
""" Returns the hed strings for this entry as a series.

Returns:
hed_strings(pd.Series): the hed strings for this series.(potentially empty)
"""
if not self.column_type:
return pd.Series(dtype=str)

Expand All @@ -69,6 +74,15 @@ def get_hed_strings(self):
return series

def set_hed_strings(self, new_strings):
""" Sets the hed strings for this entry.

Parameters:
new_strings(pd.Series, dict, or str): The hed strings to set.
This should generally be the return value from get_hed_strings

Returns:
hed_strings(pd.Series): the hed strings for this series.(potentially empty)
"""
if new_strings is None:
return False

Expand Down
1 change: 0 additions & 1 deletion hed/models/def_expand_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None
"""
self.hed_schema = hed_schema
self.ambiguous_defs = ambiguous_defs if ambiguous_defs else {}
self.ambiguous_defs_new = ambiguous_defs if ambiguous_defs else {}
self.errors = errors if errors else {}
self.def_dict = DefinitionDict(known_defs, self.hed_schema)

Expand Down
72 changes: 33 additions & 39 deletions hed/models/definition_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,16 @@ class DefinitionDict:
"""

def __init__(self, def_dicts=None, hed_schema=None):
""" Definitions to be considered a single source. """
""" Definitions to be considered a single source.

Parameters:
def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or
a single string whose definitions should be added.
hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

:raises TypeError:
- Bad type passed as def_dicts
"""

self.defs = {}
self._label_tag_name = DefTagNames.DEF_KEY
Expand All @@ -26,6 +35,9 @@ def add_definitions(self, def_dicts, hed_schema=None):
Parameters:
def_dicts (list or DefinitionDict): DefDict or list of DefDicts/strings whose definitions should be added.
hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

:raises TypeError:
- Bad type passed as def_dicts
"""
if not isinstance(def_dicts, list):
def_dicts = [def_dicts]
Expand All @@ -38,7 +50,7 @@ def add_definitions(self, def_dicts, hed_schema=None):
for definition in def_dict:
self.check_for_definitions(HedString(definition, hed_schema))
else:
print(f"Invalid input type '{type(def_dict)} passed to DefDict. Skipping.")
raise TypeError("Invalid type '{type(def_dict)}' passed to DefinitionDict")

def _add_definition(self, def_tag, def_value):
if def_tag in self.defs:
Expand All @@ -59,6 +71,16 @@ def _add_definitions_from_dict(self, def_dict):
self._add_definition(def_tag, def_value)

def get(self, def_name):
""" Get the definition entry for the definition name.

Not case-sensitive

Parameters:
def_name (str): Name of the definition to retrieve.

Returns:
DefinitionEntry: Definition entry for the requested definition.
"""
return self.defs.get(def_name.lower())

def __iter__(self):
Expand All @@ -68,26 +90,20 @@ def __len__(self):
return len(self.defs)

def items(self):
""" Returns the dictionary of definitions

Alias for .defs.items()

Returns:
def_entries({str: DefinitionEntry}): A list of definitions
"""
return self.defs.items()

@property
def issues(self):
"""Returns issues about duplicate definitions."""
return self._issues

def get_def_entry(self, def_name):
""" Get the definition entry for the definition name.

Parameters:
def_name (str): Name of the definition to retrieve.

Returns:
DefinitionEntry: Definition entry for the requested definition.

"""

return self.defs.get(def_name.lower())

def check_for_definitions(self, hed_string_obj, error_handler=None):
""" Check string for definition tags, adding them to self.

Expand All @@ -97,7 +113,6 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):

Returns:
list: List of issues encountered in checking for definitions. Each issue is a dictionary.

"""
def_issues = []
for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
Expand Down Expand Up @@ -208,8 +223,8 @@ def _find_group(self, definition_tag, group, error_handler):
def _validate_contents(self, definition_tag, group, error_handler):
issues = []
if group:
for def_tag in group.find_tags({DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY}, recursive=True,
include_groups=0):
def_keys = {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY}
for def_tag in group.find_tags(def_keys, recursive=True, include_groups=0):
issues += ErrorHandler.format_error_with_context(error_handler,
DefinitionErrors.DEF_TAG_IN_DEFINITION,
tag=def_tag,
Expand Down Expand Up @@ -250,27 +265,6 @@ def construct_def_tag(self, hed_tag):
hed_tag._expandable = def_contents
hed_tag._expanded = hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY

def expand_def_tags(self, hed_string_obj):
""" Expands def tags to def-expand tags.

Parameters:
hed_string_obj (HedString): The hed string to process.
"""
# First see if the "def" is found at all. This covers def and def-expand.
hed_string_lower = hed_string_obj.lower()
if self._label_tag_name not in hed_string_lower:
return []

def_issues = []
# We need to check for labels to expand in ALL groups
for def_tag, def_group in hed_string_obj.find_tags(DefTagNames.DEF_KEY, recursive=True):
def_contents = self._get_definition_contents(def_tag)
if def_contents is not None:
def_tag.short_base_tag = DefTagNames.DEF_EXPAND_ORG_KEY
def_group.replace(def_tag, def_contents)

return def_issues

def _get_definition_contents(self, def_tag):
""" Get the contents for a given def tag.

Expand Down
14 changes: 8 additions & 6 deletions hed/models/definition_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,21 @@ def __init__(self, name, contents, takes_value, source_context):
def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag=False):
""" Return a copy of the definition with the tag expanded and the placeholder plugged in.

Returns None if placeholder_value passed when it doesn't take value, or vice versa.

Parameters:
replace_tag (HedTag): The def hed tag to replace with an expanded version
placeholder_value (str or None): If present and required, will replace any pound signs
in the definition contents.
return_copy_of_tag(bool): Set to true for validation

Returns:
str: The expanded def tag name
HedGroup: The contents of this definition(including the def tag itself)
tuple:
str: The expanded def tag name
HedGroup: The contents of this definition(including the def tag itself)

:raises ValueError:
- If a placeholder_value is passed, but this definition doesn't have a placeholder.

- Something internally went wrong with finding the placeholder tag. This should not be possible.
"""
if self.takes_value == (placeholder_value is None):
return None, []
Expand All @@ -49,7 +51,7 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag
name = self.name
if self.contents:
output_group = self.contents
if placeholder_value:
if placeholder_value is not None:
output_group = copy.deepcopy(self.contents)
placeholder_tag = output_group.find_placeholder_tag()
if not placeholder_tag:
Expand All @@ -64,4 +66,4 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag
return f"{DefTagNames.DEF_EXPAND_ORG_KEY}/{name}", output_contents

def __str__(self):
return str(self.contents)
return str(self.contents)
23 changes: 6 additions & 17 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
expand_defs: bool
Expand any def tags found
Returns:
tuple: A list of HedStrings or a list of lists of HedStrings, DefinitionDict

tuple:
hed_strings(list of HedStrings):A list of HedStrings or a list of lists of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar
"""
if isinstance(sidecar, str):
sidecar = Sidecar(sidecar)
Expand Down Expand Up @@ -131,23 +132,11 @@ def _expand_defs(hed_string, hed_schema, def_dict):
return str(HedString(hed_string, hed_schema, def_dict).expand_defs())


def _get_matching_value(tags):
# Filter out values equal to "#" and get unique values
unique_values = set(tag.extension for tag in tags if tag.extension != "#")
if len(unique_values) == 0:
return "#"

if len(unique_values) > 1:
return None

return next(iter(unique_values))


def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None):
"""
Processes a list of HED strings according to a given HED schema, using known definitions and ambiguous definitions.
""" Processes a list of HED strings according to a given HED schema,
using known definitions and ambiguous definitions.

Args:
Parameters:
hed_strings (list or pd.Series): A list of HED strings to process.
hed_schema (HedSchema): The schema to use
known_defs (DefinitionDict or list or str), optional):
Expand Down
Loading