Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion hed/schema/schema_io/base2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from abc import abstractmethod, ABC
from hed.schema import schema_header_util
from hed.schema import hed_schema_constants
from hed.schema.schema_io import df_constants


class SchemaLoader(ABC):
Expand Down Expand Up @@ -127,7 +128,7 @@ def _load(self):

self._parse_data()
self._schema.finalize_dictionaries()

self.fix_extras()
return self._schema

@abstractmethod
Expand Down Expand Up @@ -213,3 +214,26 @@ def find_rooted_entry(tag_entry, schema, loading_merged):
def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed",
error_code=HedExceptions.WIKI_DELIMITERS_INVALID):
self.fatal_errors += schema_util.format_error(line_number, line, warning_message, error_code)


def fix_extras(self):
""" Fixes the extras after loading the schema, to ensure they are in the correct format."""
if not self._schema or not hasattr(self._schema, 'extras') or not self._schema.extras:
return

for key, extra in self._schema.extras.items():
self._schema.extras[key] = extra.rename(columns=df_constants.EXTRAS_CONVERSIONS)
if key in df_constants.extras_column_dict:
self._schema.extras[key] = SchemaLoader.fix_extra(self._schema, key)

@staticmethod
def fix_extra(schema, key):
df = schema.extras[key]
priority_cols = df_constants.extras_column_dict[key]
col_to_add = [col for col in priority_cols if col not in df.columns]
if col_to_add:
df[col_to_add] = ""
other_cols = sorted(set(df.columns) - set(priority_cols))
df = df[priority_cols + other_cols]
df = df.sort_values(by=list(df.columns))
return df
13 changes: 0 additions & 13 deletions hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def load_spreadsheet(cls, filenames=None, schema_as_strings_or_df=None, name="")
"""
loader = cls(filenames, schema_as_strings_or_df=schema_as_strings_or_df, name=name)
hed_schema = loader._load()
cls._fix_extras(hed_schema)
return hed_schema

def _open_file(self):
Expand All @@ -56,19 +55,7 @@ def _open_file(self):

return dataframes

@staticmethod
def _fix_extras(hed_schema):
""" Fixes the extras after loading the schema, to ensure they are in the correct format.

Parameters:
hed_schema (HedSchema): The loaded HedSchema object to fix extras for.

"""
if not hed_schema or not hasattr(hed_schema, 'extras') or not hed_schema.extras:
return

for key, extra in hed_schema.extras.items():
hed_schema.extras[key] = extra.rename(columns=constants.EXTRAS_CONVERSIONS)

def _get_header_attributes(self, file_data):
header_attributes = {}
Expand Down
5 changes: 4 additions & 1 deletion hed/schema/schema_io/df_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@
property_columns = [hed_id, name, type, dcdescription]
prefix_columns = [prefix, namespace, description]
external_annotation_columns = [prefix, id, iri, description]
source_columns = [source, link] # For the sources section
source_columns = [source, link, description]

extras_column_dict = {PREFIXES_KEY: prefix_columns, EXTERNAL_ANNOTATION_KEY: external_annotation_columns,
SOURCES_KEY: source_columns}# For the sources section

# The columns for unit class, value class, and unit modifier
other_columns = [hed_id, name, subclass_of, attributes, dcdescription]
Expand Down
1 change: 1 addition & 0 deletions hed/schema/schema_io/xml2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def _read_sources(self):
for source_element in source_elements:
source_name = self._get_element_tag_value(source_element, xml_constants.NAME_ELEMENT)
source_link = self._get_element_tag_value(source_element, xml_constants.LINK_ELEMENT)
description = self._get_element_tag_value(source_element, xml_constants.DESCRIPTION_ELEMENT)
data.append({df_constants.source: source_name, df_constants.link: source_link})
self._schema.extras[df_constants.SOURCES_KEY] = pd.DataFrame(data, columns=df_constants.source_columns)

Expand Down