diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index 93c60d8fa..68251c01a 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -9,6 +9,7 @@ from abc import abstractmethod, ABC from hed.schema import schema_header_util from hed.schema import hed_schema_constants +from hed.schema.schema_io import df_constants class SchemaLoader(ABC): @@ -127,7 +128,7 @@ def _load(self): self._parse_data() self._schema.finalize_dictionaries() - + self.fix_extras() return self._schema @abstractmethod @@ -213,3 +214,26 @@ def find_rooted_entry(tag_entry, schema, loading_merged): def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed", error_code=HedExceptions.WIKI_DELIMITERS_INVALID): self.fatal_errors += schema_util.format_error(line_number, line, warning_message, error_code) + + + def fix_extras(self): + """ Fixes the extras after loading the schema, to ensure they are in the correct format.""" + if not self._schema or not hasattr(self._schema, 'extras') or not self._schema.extras: + return + + for key, extra in self._schema.extras.items(): + self._schema.extras[key] = extra.rename(columns=df_constants.EXTRAS_CONVERSIONS) + if key in df_constants.extras_column_dict: + self._schema.extras[key] = SchemaLoader.fix_extra(self._schema, key) + + @staticmethod + def fix_extra(schema, key): + df = schema.extras[key] + priority_cols = df_constants.extras_column_dict[key] + col_to_add = [col for col in priority_cols if col not in df.columns] + if col_to_add: + df[col_to_add] = "" + other_cols = sorted(set(df.columns) - set(priority_cols)) + df = df[priority_cols + other_cols] + df = df.sort_values(by=list(df.columns)) + return df \ No newline at end of file diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py index e7f8fa1a7..46a8f1310 100644 --- a/hed/schema/schema_io/df2schema.py +++ b/hed/schema/schema_io/df2schema.py @@ -45,7 +45,6 @@ def load_spreadsheet(cls, filenames=None, schema_as_strings_or_df=None, name="") """ loader = cls(filenames, schema_as_strings_or_df=schema_as_strings_or_df, name=name) hed_schema = loader._load() - cls._fix_extras(hed_schema) return hed_schema def _open_file(self): @@ -56,19 +55,7 @@ def _open_file(self): return dataframes - @staticmethod - def _fix_extras(hed_schema): - """ Fixes the extras after loading the schema, to ensure they are in the correct format. - Parameters: - hed_schema (HedSchema): The loaded HedSchema object to fix extras for. - - """ - if not hed_schema or not hasattr(hed_schema, 'extras') or not hed_schema.extras: - return - - for key, extra in hed_schema.extras.items(): - hed_schema.extras[key] = extra.rename(columns=constants.EXTRAS_CONVERSIONS) def _get_header_attributes(self, file_data): header_attributes = {} diff --git a/hed/schema/schema_io/df_constants.py b/hed/schema/schema_io/df_constants.py index f1ac63c9f..0464d129c 100644 --- a/hed/schema/schema_io/df_constants.py +++ b/hed/schema/schema_io/df_constants.py @@ -82,7 +82,10 @@ property_columns = [hed_id, name, type, dcdescription] prefix_columns = [prefix, namespace, description] external_annotation_columns = [prefix, id, iri, description] -source_columns = [source, link] # For the sources section +source_columns = [source, link, description] + +extras_column_dict = {PREFIXES_KEY: prefix_columns, EXTERNAL_ANNOTATION_KEY: external_annotation_columns, + SOURCES_KEY: source_columns}# For the sources section # The columns for unit class, value class, and unit modifier other_columns = [hed_id, name, subclass_of, attributes, dcdescription] diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py index d3b06ac9b..554572cc5 100644 --- a/hed/schema/schema_io/xml2schema.py +++ b/hed/schema/schema_io/xml2schema.py @@ -104,6 +104,7 @@ def _read_sources(self): for source_element in source_elements: source_name = self._get_element_tag_value(source_element, xml_constants.NAME_ELEMENT) source_link = self._get_element_tag_value(source_element, xml_constants.LINK_ELEMENT) + description = self._get_element_tag_value(source_element, xml_constants.DESCRIPTION_ELEMENT) data.append({df_constants.source: source_name, df_constants.link: source_link}) self._schema.extras[df_constants.SOURCES_KEY] = pd.DataFrame(data, columns=df_constants.source_columns)