Skip to content
Merged
2 changes: 1 addition & 1 deletion hed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from hed.schema.hed_schema import HedSchema
from hed.schema.hed_schema_group import HedSchemaGroup
from hed.schema.hed_schema_io import get_schema, get_schema_versions, load_schema, load_schema_version
from hed.schema.hed_schema_io import get_schema, load_schema, load_schema_version


# from hed import errors, models, schema, tools, validator
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.HED_SCHEMA_ATTRIBUTE_INVALID)
@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
f"or was used outside of it's defined class."
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class SidecarErrors:

class SchemaErrors:
HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE'
HED_SCHEMA_ATTRIBUTE_INVALID = 'HED_SCHEMA_ATTRIBUTE_INVALID'
SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
HED_SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"


Expand Down
34 changes: 29 additions & 5 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,21 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
has_column_names (bool): True if file has column names.
This value is ignored if you pass in a pandas dataframe.
mapper (ColumnMapper or None): Indicates which columns have HED tags.
See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.
name (str or None): Optional field for how this file will report errors.
allow_blank_names(bool): If True, column names can be blank
Notes:
- See SpreadsheetInput or TabularInput for examples of how to use built-in a ColumnMapper.

:raises HedFileError:
- file is blank
- An invalid dataframe was passed with size 0
- An invalid extension was provided
- A duplicate or empty column name appears

:raises OSError:
- Cannot open the indicated file

:raises KeyError:
- The specified worksheet name does not exist
"""
if mapper is None:
mapper = ColumnMapper()
Expand Down Expand Up @@ -94,7 +104,6 @@ def reset_mapper(self, new_mapper):

Parameters:
new_mapper (ColumnMapper): A column mapper to be associated with this base input.

"""
self._mapper = new_mapper
if not self._mapper:
Expand Down Expand Up @@ -200,8 +209,10 @@ def to_excel(self, file):
file (str or file-like): Location to save this base input.

:raises ValueError:
- if empty file object or file cannot be opened.

- if empty file object was passed

:raises OSError:
- Cannot open the indicated file
"""
if not file:
raise ValueError("Empty file name or object passed in to BaseInput.save.")
Expand Down Expand Up @@ -232,6 +243,8 @@ def to_csv(self, file=None):
Returns:
None or str: None if file is given or the contents as a str if file is None.

:raises OSError:
- Cannot open the indicated file
"""
dataframe = self._dataframe
csv_string_if_filename_none = dataframe.to_csv(file, '\t', index=False, header=self._has_column_names)
Expand Down Expand Up @@ -272,6 +285,15 @@ def set_cell(self, row_number, column_number, new_string_obj, tag_form="short_ta

Notes:
Any attribute of a HedTag that returns a string is a valid value of tag_form.

:raises ValueError:
- There is not a loaded dataframe

:raises KeyError:
- the indicated row/column does not exist

:raises AttributeError:
- The indicated tag_form is not an attribute of HedTag
"""
if self._dataframe is None:
raise ValueError("No data frame loaded")
Expand All @@ -291,6 +313,8 @@ def get_worksheet(self, worksheet_name=None):
Notes:
If None, returns the first worksheet.

:raises KeyError:
- The specified worksheet name does not exist
"""
if worksheet_name and self._loaded_workbook:
# return self._loaded_workbook.get_sheet_by_name(worksheet_name)
Expand Down
16 changes: 12 additions & 4 deletions hed/models/column_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def __init__(self, sidecar=None, tag_columns=None, column_prefix_dictionary=None
Sidecar column definitions will take precedent if there is a conflict with tag_columns.
column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag
prefixes to prepend to the tags in that column before processing.

optional_tag_columns (list): A list of ints or strings containing the columns that contain
the HED tags. If the column is otherwise unspecified, convert this column type to HEDTags.
warn_on_missing_column (bool): If True, issue mapping warnings on column names that are missing from
Expand Down Expand Up @@ -89,6 +88,10 @@ def column_prefix_dictionary(self):
def get_transformers(self):
""" Return the transformers to use on a dataframe

Returns:
tuple(dict, list):
dict({str or int: func}): the functions to use to transform each column
need_categorical(list of int): a list of columns to treat as categoriacl
"""
final_transformers = {}
need_categorical = []
Expand Down Expand Up @@ -144,8 +147,8 @@ def _set_sidecar(self, sidecar):
Parameters:
sidecar (Sidecar or None): the sidecar to use

Returns:

:raises ValueError:
- A sidecar was prevoiusly set
"""
if self._sidecar:
raise ValueError("Trying to set a second sidecar on a column mapper.")
Expand All @@ -156,6 +159,11 @@ def _set_sidecar(self, sidecar):

@property
def sidecar_column_data(self):
""" Pass through to get the sidecar ColumnMetadata

Returns:
dict({str:ColumnMetadata}): the column metadata defined by this sidecar
"""
if self._sidecar:
return self._sidecar.column_data

Expand All @@ -168,7 +176,7 @@ def get_tag_columns(self):

Returns:
column_identifiers(list): A list of column numbers or names that are ColumnType.HedTags.
0-based if integer-based, otherwise column name.
0-based if integer-based, otherwise column name.
"""
return [column_entry.column_name for number, column_entry in self._final_column_map.items()
if column_entry.column_type == ColumnType.HEDTags]
Expand Down
14 changes: 14 additions & 0 deletions hed/models/column_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ def source_dict(self):
return self._source[self.column_name]

def get_hed_strings(self):
""" Returns the hed strings for this entry as a series.

Returns:
hed_strings(pd.Series): the hed strings for this series.(potentially empty)
"""
if not self.column_type:
return pd.Series(dtype=str)

Expand All @@ -69,6 +74,15 @@ def get_hed_strings(self):
return series

def set_hed_strings(self, new_strings):
""" Sets the hed strings for this entry.

Parameters:
new_strings(pd.Series, dict, or str): The hed strings to set.
This should generally be the return value from get_hed_strings

Returns:
hed_strings(pd.Series): the hed strings for this series.(potentially empty)
"""
if new_strings is None:
return False

Expand Down
1 change: 0 additions & 1 deletion hed/models/def_expand_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def __init__(self, hed_schema, known_defs=None, ambiguous_defs=None, errors=None
"""
self.hed_schema = hed_schema
self.ambiguous_defs = ambiguous_defs if ambiguous_defs else {}
self.ambiguous_defs_new = ambiguous_defs if ambiguous_defs else {}
self.errors = errors if errors else {}
self.def_dict = DefinitionDict(known_defs, self.hed_schema)

Expand Down
72 changes: 33 additions & 39 deletions hed/models/definition_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,16 @@ class DefinitionDict:
"""

def __init__(self, def_dicts=None, hed_schema=None):
""" Definitions to be considered a single source. """
""" Definitions to be considered a single source.

Parameters:
def_dicts (str or list or DefinitionDict): DefDict or list of DefDicts/strings or
a single string whose definitions should be added.
hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

:raises TypeError:
- Bad type passed as def_dicts
"""

self.defs = {}
self._label_tag_name = DefTagNames.DEF_KEY
Expand All @@ -26,6 +35,9 @@ def add_definitions(self, def_dicts, hed_schema=None):
Parameters:
def_dicts (list or DefinitionDict): DefDict or list of DefDicts/strings whose definitions should be added.
hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise.

:raises TypeError:
- Bad type passed as def_dicts
"""
if not isinstance(def_dicts, list):
def_dicts = [def_dicts]
Expand All @@ -38,7 +50,7 @@ def add_definitions(self, def_dicts, hed_schema=None):
for definition in def_dict:
self.check_for_definitions(HedString(definition, hed_schema))
else:
print(f"Invalid input type '{type(def_dict)} passed to DefDict. Skipping.")
raise TypeError("Invalid type '{type(def_dict)}' passed to DefinitionDict")

def _add_definition(self, def_tag, def_value):
if def_tag in self.defs:
Expand All @@ -59,6 +71,16 @@ def _add_definitions_from_dict(self, def_dict):
self._add_definition(def_tag, def_value)

def get(self, def_name):
""" Get the definition entry for the definition name.

Not case-sensitive

Parameters:
def_name (str): Name of the definition to retrieve.

Returns:
DefinitionEntry: Definition entry for the requested definition.
"""
return self.defs.get(def_name.lower())

def __iter__(self):
Expand All @@ -68,26 +90,20 @@ def __len__(self):
return len(self.defs)

def items(self):
""" Returns the dictionary of definitions

Alias for .defs.items()

Returns:
def_entries({str: DefinitionEntry}): A list of definitions
"""
return self.defs.items()

@property
def issues(self):
"""Returns issues about duplicate definitions."""
return self._issues

def get_def_entry(self, def_name):
""" Get the definition entry for the definition name.

Parameters:
def_name (str): Name of the definition to retrieve.

Returns:
DefinitionEntry: Definition entry for the requested definition.

"""

return self.defs.get(def_name.lower())

def check_for_definitions(self, hed_string_obj, error_handler=None):
""" Check string for definition tags, adding them to self.

Expand All @@ -97,7 +113,6 @@ def check_for_definitions(self, hed_string_obj, error_handler=None):

Returns:
list: List of issues encountered in checking for definitions. Each issue is a dictionary.

"""
def_issues = []
for definition_tag, group in hed_string_obj.find_top_level_tags(anchor_tags={DefTagNames.DEFINITION_KEY}):
Expand Down Expand Up @@ -208,8 +223,8 @@ def _find_group(self, definition_tag, group, error_handler):
def _validate_contents(self, definition_tag, group, error_handler):
issues = []
if group:
for def_tag in group.find_tags({DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY}, recursive=True,
include_groups=0):
def_keys = {DefTagNames.DEF_KEY, DefTagNames.DEF_EXPAND_KEY, DefTagNames.DEFINITION_KEY}
for def_tag in group.find_tags(def_keys, recursive=True, include_groups=0):
issues += ErrorHandler.format_error_with_context(error_handler,
DefinitionErrors.DEF_TAG_IN_DEFINITION,
tag=def_tag,
Expand Down Expand Up @@ -250,27 +265,6 @@ def construct_def_tag(self, hed_tag):
hed_tag._expandable = def_contents
hed_tag._expanded = hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY

def expand_def_tags(self, hed_string_obj):
""" Expands def tags to def-expand tags.

Parameters:
hed_string_obj (HedString): The hed string to process.
"""
# First see if the "def" is found at all. This covers def and def-expand.
hed_string_lower = hed_string_obj.lower()
if self._label_tag_name not in hed_string_lower:
return []

def_issues = []
# We need to check for labels to expand in ALL groups
for def_tag, def_group in hed_string_obj.find_tags(DefTagNames.DEF_KEY, recursive=True):
def_contents = self._get_definition_contents(def_tag)
if def_contents is not None:
def_tag.short_base_tag = DefTagNames.DEF_EXPAND_ORG_KEY
def_group.replace(def_tag, def_contents)

return def_issues

def _get_definition_contents(self, def_tag):
""" Get the contents for a given def tag.

Expand Down
14 changes: 8 additions & 6 deletions hed/models/definition_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,21 @@ def __init__(self, name, contents, takes_value, source_context):
def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag=False):
""" Return a copy of the definition with the tag expanded and the placeholder plugged in.

Returns None if placeholder_value passed when it doesn't take value, or vice versa.

Parameters:
replace_tag (HedTag): The def hed tag to replace with an expanded version
placeholder_value (str or None): If present and required, will replace any pound signs
in the definition contents.
return_copy_of_tag(bool): Set to true for validation

Returns:
str: The expanded def tag name
HedGroup: The contents of this definition(including the def tag itself)
tuple:
str: The expanded def tag name
HedGroup: The contents of this definition(including the def tag itself)

:raises ValueError:
- If a placeholder_value is passed, but this definition doesn't have a placeholder.

- Something internally went wrong with finding the placeholder tag. This should not be possible.
"""
if self.takes_value == (placeholder_value is None):
return None, []
Expand All @@ -49,7 +51,7 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag
name = self.name
if self.contents:
output_group = self.contents
if placeholder_value:
if placeholder_value is not None:
output_group = copy.deepcopy(self.contents)
placeholder_tag = output_group.find_placeholder_tag()
if not placeholder_tag:
Expand All @@ -64,4 +66,4 @@ def get_definition(self, replace_tag, placeholder_value=None, return_copy_of_tag
return f"{DefTagNames.DEF_EXPAND_ORG_KEY}/{name}", output_contents

def __str__(self):
return str(self.contents)
return str(self.contents)
Loading