Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions hed/schema/schema_io/base2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def find_rooted_entry(tag_entry, schema, loading_merged):
return None

return rooted_entry
return None

def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed",
error_code=HedExceptions.WIKI_DELIMITERS_INVALID):
Expand All @@ -224,11 +225,10 @@ def fix_extras(self):
for key, extra in self._schema.extras.items():
self._schema.extras[key] = extra.rename(columns=df_constants.EXTRAS_CONVERSIONS)
if key in df_constants.extras_column_dict:
self._schema.extras[key] = self.fix_extra(self._schema, key)
self._schema.extras[key] = self.fix_extra(key)

@staticmethod
def fix_extra(schema, key):
df = schema.extras[key]
def fix_extra(self, key):
df = self._schema.extras[key]
priority_cols = df_constants.extras_column_dict[key]
col_to_add = [col for col in priority_cols if col not in df.columns]
if col_to_add:
Expand Down
3 changes: 2 additions & 1 deletion hed/schema/schema_io/df2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def _parse_data(self):
f"parameter on this exception for more details.", self.name,
issues=self.fatal_errors)
extras = {key: self.input_data[key] for key in constants.DF_EXTRA_SUFFIXES if key in self.input_data}
self._schema.extras = extras
for key, item in extras.items():
self._schema.extras[key] = df_util.merge_dataframes(extras[key], self._schema.extras.get(key, None), key)

def _get_prologue_epilogue(self, file_data):
prologue, epilogue = "", ""
Expand Down
21 changes: 21 additions & 0 deletions hed/schema/schema_io/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,27 @@

UNKNOWN_LIBRARY_VALUE = 0

def merge_dataframes(df1, df2, key) :
""" Create a new dataframe where df2 is merged into df1 and duplicates are eliminated.

Parameters:
df1(df.DataFrame): dataframe to use as destination merge.
df2(df.DataFrame): dataframe to use as a merge element.
key(str): name of the column that is treated as the key when dataframes are merged

Returns:
df.DataFrame: The merged dataframe.
"""
if df2 is None or df2.empty:
return df1
if set(df1.columns) != set(df2.columns):
raise HedFileError(HedExceptions.BAD_COLUMN_NAMES,
f"Both dataframes corresponding to {key} to be merged must have the same columns. "
f"df1 columns: {list(df1.columns)} df2 columns: {list(df2.columns)}", "")
combined = pd.concat([df1, df2], ignore_index=True)
combined = combined.sort_values(by=list(combined.columns))
combined = combined.drop_duplicates()
return combined

def merge_dataframe_dicts(df_dict1, df_dict2, key_column=constants.KEY_COLUMN_NAME):
""" Create a new dictionary of DataFrames where dict2 is merged into dict1.
Expand Down
6 changes: 4 additions & 2 deletions hed/schema/schema_io/wiki2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from hed.errors import error_reporter
from hed.schema.schema_io import wiki_constants, df_constants
from hed.schema.schema_io.base2schema import SchemaLoader
from hed.schema.schema_io.wiki_constants import HedWikiSection, SectionNames
from hed.schema.schema_io.wiki_constants import HedWikiSection, SectionNames, WIKI_EXTRA_DICT
from hed.schema.schema_io import text_util


Expand Down Expand Up @@ -120,7 +120,9 @@ def _parse_extras(self, wiki_lines_by_section):
if not data:
continue
df = pd.DataFrame(data).fillna('').astype(str)
self._schema.extras[extra_key.strip('"')] = df
stripped_key = extra_key.strip("'")
stripped_key = WIKI_EXTRA_DICT.get(stripped_key, stripped_key)
self._schema.extras[stripped_key] = df

@staticmethod
def parse_star_string(s):
Expand Down
6 changes: 6 additions & 0 deletions hed/schema/schema_io/wiki_constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from hed.schema.hed_schema_constants import HedSectionKey
from hed.schema.schema_io import df_constants
START_HED_STRING = "!# start schema"
END_SCHEMA_STRING = "!# end schema"
END_HED_STRING = "!# end hed"
Expand All @@ -17,6 +18,11 @@
PREFIXES_SECTION_ELEMENT = "'''Prefixes'''"
EXTERNAL_ANNOTATION_SECTION_ELEMENT = "'''External annotations'''"

WIKI_EXTRA_DICT = {'Sources': df_constants.SOURCES_KEY,
'Prefixes': df_constants.PREFIXES_KEY,
'External annotations': df_constants.EXTERNAL_ANNOTATION_KEY}


wiki_section_headers = {
HedSectionKey.Tags: START_HED_STRING,
HedSectionKey.UnitClasses: UNIT_CLASS_STRING,
Expand Down
Loading