Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
MAX_MEMORY_CACHE = 20


def from_string(schema_string, file_type=".xml", schema_namespace=None):
def from_string(schema_string, schema_format=".xml", schema_namespace=None):
""" Create a schema from the given string.

Parameters:
schema_string (str): An XML or mediawiki file as a single long string.
file_type (str): The extension(including the .) corresponding to a file source.
schema_format (str): The schema format of the source schema string.
schema_namespace (str, None): The name_prefix all tags in this schema will accept.

Returns:
Expand All @@ -38,12 +38,12 @@ def from_string(schema_string, file_type=".xml", schema_namespace=None):
raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string",
filename=schema_string)

if file_type.endswith(".xml"):
if schema_format.endswith(".xml"):
hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string)
elif file_type.endswith(".mediawiki"):
elif schema_format.endswith(".mediawiki"):
hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string)
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=file_type)
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)

if schema_namespace:
hed_schema.set_schema_prefix(schema_namespace=schema_namespace)
Expand Down Expand Up @@ -75,7 +75,7 @@ def load_schema(hed_path=None, schema_namespace=None):

if is_url:
file_as_string = schema_util.url_to_string(hed_path)
hed_schema = from_string(file_as_string, file_type=os.path.splitext(hed_path.lower())[1])
hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
elif hed_path.lower().endswith(".xml"):
hed_schema = SchemaLoaderXML.load(hed_path)
elif hed_path.lower().endswith(".mediawiki"):
Expand Down
18 changes: 12 additions & 6 deletions hed/schema/schema_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK
section_dict.update(unequal_entries[section_key])

if output == 'string':
return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ") for entries in matches.values()])
return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ")
for entries in matches.values()])
elif output == 'dict':
output_dict = {}
for section_name, section_entries in matches.items():
Expand All @@ -38,16 +39,16 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK
return matches


def compare_differences(schema1, schema2, output='default', attribute_filter=None, sections=(HedSectionKey.Tags,)):
def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,)):
"""
Compare the tags in two schemas, this finds any differences

Parameters:
schema1 (HedSchema): The first schema to be compared.
schema2 (HedSchema): The second schema to be compared.
output (str): Defaults to returning a set of python object dicts.
output (str): 'raw' (default) returns a tuple of python object dicts with raw results.
'string' returns a single string
'dict' returns a json style dictionary
'dict' returns a json-style python dictionary that can be converted to JSON
attribute_filter (str, optional): The attribute to filter entries by.
Entries without this attribute are skipped.
The most common use would be HedKey.InLibrary
Expand All @@ -56,11 +57,16 @@ def compare_differences(schema1, schema2, output='default', attribute_filter=Non
If None, checks all sections including header, prologue, and epilogue.

Returns:
tuple or str: A tuple containing three dictionaries:
tuple, str or dict:
- Tuple with dict entries (not_in_schema1, not_in_schema1, unequal_entries).
- Formatted string with the output ready for printing.
- A Python dictionary with the output ready to be converted to JSON (for web output).

Notes: The underlying dictionaries are:
- not_in_schema1(dict): Entries present in schema2 but not in schema1.
- not_in_schema2(dict): Entries present in schema1 but not in schema2.
- unequal_entries(dict): Entries that differ between the two schemas.
- or a formatted string of the differences

"""
_, not_in_1, not_in_2, unequal_entries = compare_schemas(schema1, schema2, attribute_filter=attribute_filter,
sections=sections)
Expand Down
28 changes: 14 additions & 14 deletions hed/tools/remodeling/operations/summarize_column_values_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
Updates the relevant summary.

"""

df_new = df.copy()
summary = dispatcher.summary_dicts.get(self.summary_name, None)
if not summary:
Expand Down Expand Up @@ -130,11 +130,11 @@ def get_details_dict(self, summary):
this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)])
return {"Name": this_summary['Name'], "Total events": this_summary["Total events"],
"Total files": this_summary['Total files'],
"Files": [name for name in this_summary['Files'].keys()],
"Specifics": {"Value columns": this_summary['Value columns'].keys(),
"Files": list(this_summary['Files'].keys()),
"Specifics": {"Value columns": list(this_summary['Value columns']),
"Skip columns": this_summary['Skip columns'],
"Value columns": this_summary['Value columns'],
"Categorical columns": this_summary['Categorical columns'],
"Value column summaries": this_summary['Value columns'],
"Categorical column summaries": this_summary['Categorical columns'],
"Categorical counts": this_summary['Categorical counts']}}

def merge_all_info(self):
Expand Down Expand Up @@ -209,9 +209,9 @@ def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
cat_string = self._get_categorical_string(specifics, offset="", indent=indent)
if cat_string:
sum_list.append(cat_string)
val_cols = specifics.get("Value columns", {})
if val_cols:
sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset="", indent=indent))
val_dict = specifics.get("Value column summaries", {})
if val_dict:
sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent))
return "\n".join(sum_list)

def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
Expand All @@ -228,12 +228,12 @@ def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
"""
sum_list = [f"Total events={result.get('Total events', 0)}"]
specifics = result.get("Specifics", {})
cat_cols = result.get("Categorical columns", {})
if cat_cols:
sum_list.append(self._get_categorical_string(cat_cols, offset=indent, indent=indent))
val_cols = result.get("Value columns", {})
if val_cols:
sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset=indent, indent=indent))
cat_dict = specifics.get("Categorical column summaries", {})
if cat_dict:
sum_list.append(self._get_categorical_string(cat_dict, offset=indent, indent=indent))
val_dict = specifics.get("Value column summaries", {})
if val_dict:
sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset=indent, indent=indent))
return "\n".join(sum_list)

def _get_categorical_col(self, entry, count_dict, offset="", indent=" "):
Expand Down
3 changes: 1 addition & 2 deletions hed/tools/remodeling/operations/summarize_definitions_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def get_details_dict(self, def_gatherer):
known_defs_summary.update(ambiguous_defs_summary)
known_defs_summary.update(errors_summary)
return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary}

return known_defs_summary
# return known_defs_summary

def merge_all_info(self):
""" Create an Object containing the definition summary.
Expand Down
4 changes: 2 additions & 2 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class SummarizeHedTagsOp(BaseOp):
},
"optional_parameters": {
"append_timecode": bool,
"expand_definitions": bool,
"expand_context": bool
"expand_context": bool,
"expand_definitions": bool
}
}

Expand Down
2 changes: 1 addition & 1 deletion spec_tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def _run_single_schema_test(self, info, error_code, description,name, error_hand
for test in tests:
schema_string = "\n".join(test)
try:
loaded_schema = from_string(schema_string, file_type=".mediawiki")
loaded_schema = from_string(schema_string, schema_format=".mediawiki")
issues = loaded_schema.check_compliance()
except HedFileError as e:
issues = e.issues
Expand Down
2 changes: 1 addition & 1 deletion tests/schema/test_schema_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_schema_as_string_wiki(self):
with open(self.wiki_file) as file:
hed_schema_as_string = "".join([line for line in file])

string_schema = schema.from_string(hed_schema_as_string, file_type=".mediawiki")
string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki")
self.assertEqual(string_schema, self.hed_schema_wiki)

def test_wikischema2xml(self):
Expand Down