hed-standard · VisLab · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py
@@ -9,6 +9,7 @@
 from abc import abstractmethod, ABC
 from hed.schema import schema_header_util
 from hed.schema import hed_schema_constants
+from hed.schema.schema_io import df_constants
 
 
 class SchemaLoader(ABC):
@@ -127,7 +128,7 @@ def _load(self):
 
         self._parse_data()
         self._schema.finalize_dictionaries()
-
+        self.fix_extras()
         return self._schema
 
     @abstractmethod
@@ -213,3 +214,26 @@ def find_rooted_entry(tag_entry, schema, loading_merged):
     def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed",
                          error_code=HedExceptions.WIKI_DELIMITERS_INVALID):
         self.fatal_errors += schema_util.format_error(line_number, line, warning_message, error_code)
+
+
+    def fix_extras(self):
+        """ Fixes the extras after loading the schema, to ensure they are in the correct format."""
+        if not self._schema or not hasattr(self._schema, 'extras') or not self._schema.extras:
+            return
+
+        for key, extra in self._schema.extras.items():
+            self._schema.extras[key] = extra.rename(columns=df_constants.EXTRAS_CONVERSIONS)
+            if key in df_constants.extras_column_dict:
+               self._schema.extras[key] = SchemaLoader.fix_extra(self._schema, key)
+
+    @staticmethod
+    def fix_extra(schema, key):
+        df = schema.extras[key]
+        priority_cols = df_constants.extras_column_dict[key]
+        col_to_add = [col for col in priority_cols if col not in df.columns]
+        if col_to_add:
+            df[col_to_add] = ""
+        other_cols = sorted(set(df.columns) - set(priority_cols))
+        df = df[priority_cols + other_cols]
+        df = df.sort_values(by=list(df.columns))
+        return df
diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py
@@ -45,7 +45,6 @@ def load_spreadsheet(cls, filenames=None, schema_as_strings_or_df=None, name="")
         """
         loader = cls(filenames, schema_as_strings_or_df=schema_as_strings_or_df, name=name)
         hed_schema = loader._load()
-        cls._fix_extras(hed_schema)
         return hed_schema
 
     def _open_file(self):
@@ -56,19 +55,7 @@ def _open_file(self):
 
         return dataframes
 
-    @staticmethod
-    def _fix_extras(hed_schema):
-        """ Fixes the extras after loading the schema, to ensure they are in the correct format.
 
-        Parameters:
-            hed_schema (HedSchema): The loaded HedSchema object to fix extras for.
-
-        """
-        if not hed_schema or not hasattr(hed_schema, 'extras') or not hed_schema.extras:
-            return
-
-        for key, extra in hed_schema.extras.items():
-            hed_schema.extras[key] = extra.rename(columns=constants.EXTRAS_CONVERSIONS)
 
     def _get_header_attributes(self, file_data):
         header_attributes = {}

diff --git a/hed/schema/schema_io/df_constants.py b/hed/schema/schema_io/df_constants.py
@@ -82,7 +82,10 @@
 property_columns = [hed_id, name, type, dcdescription]
 prefix_columns = [prefix, namespace, description]
 external_annotation_columns = [prefix, id, iri, description]
-source_columns = [source, link]  # For the sources section
+source_columns = [source, link, description]
+
+extras_column_dict = {PREFIXES_KEY: prefix_columns, EXTERNAL_ANNOTATION_KEY: external_annotation_columns,
+               SOURCES_KEY: source_columns}# For the sources section
 
 # The columns for unit class, value class, and unit modifier
 other_columns = [hed_id, name, subclass_of, attributes, dcdescription]

diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py
@@ -104,6 +104,7 @@ def _read_sources(self):
         for source_element in source_elements:
             source_name = self._get_element_tag_value(source_element, xml_constants.NAME_ELEMENT)
             source_link = self._get_element_tag_value(source_element, xml_constants.LINK_ELEMENT)
+            description = self._get_element_tag_value(source_element, xml_constants.DESCRIPTION_ELEMENT)
             data.append({df_constants.source: source_name, df_constants.link: source_link})
         self._schema.extras[df_constants.SOURCES_KEY] = pd.DataFrame(data, columns=df_constants.source_columns)