diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index 60a42d154..11980ed47 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -15,12 +15,12 @@ MAX_MEMORY_CACHE = 20 -def from_string(schema_string, file_type=".xml", schema_namespace=None): +def from_string(schema_string, schema_format=".xml", schema_namespace=None): """ Create a schema from the given string. Parameters: schema_string (str): An XML or mediawiki file as a single long string. - file_type (str): The extension(including the .) corresponding to a file source. + schema_format (str): The schema format of the source schema string. schema_namespace (str, None): The name_prefix all tags in this schema will accept. Returns: @@ -38,12 +38,12 @@ def from_string(schema_string, file_type=".xml", schema_namespace=None): raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string", filename=schema_string) - if file_type.endswith(".xml"): + if schema_format.endswith(".xml"): hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string) - elif file_type.endswith(".mediawiki"): + elif schema_format.endswith(".mediawiki"): hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string) else: - raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=file_type) + raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format) if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) @@ -75,7 +75,7 @@ def load_schema(hed_path=None, schema_namespace=None): if is_url: file_as_string = schema_util.url_to_string(hed_path) - hed_schema = from_string(file_as_string, file_type=os.path.splitext(hed_path.lower())[1]) + hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1]) elif hed_path.lower().endswith(".xml"): hed_schema = SchemaLoaderXML.load(hed_path) elif hed_path.lower().endswith(".mediawiki"): diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index 5729dd748..fc2caf41e 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -27,7 +27,8 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK section_dict.update(unequal_entries[section_key]) if output == 'string': - return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ") for entries in matches.values()]) + return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ") + for entries in matches.values()]) elif output == 'dict': output_dict = {} for section_name, section_entries in matches.items(): @@ -38,16 +39,16 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK return matches -def compare_differences(schema1, schema2, output='default', attribute_filter=None, sections=(HedSectionKey.Tags,)): +def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,)): """ Compare the tags in two schemas, this finds any differences Parameters: schema1 (HedSchema): The first schema to be compared. schema2 (HedSchema): The second schema to be compared. - output (str): Defaults to returning a set of python object dicts. + output (str): 'raw' (default) returns a tuple of python object dicts with raw results. 'string' returns a single string - 'dict' returns a json style dictionary + 'dict' returns a json-style python dictionary that can be converted to JSON attribute_filter (str, optional): The attribute to filter entries by. Entries without this attribute are skipped. The most common use would be HedKey.InLibrary @@ -56,11 +57,16 @@ def compare_differences(schema1, schema2, output='default', attribute_filter=Non If None, checks all sections including header, prologue, and epilogue. Returns: - tuple or str: A tuple containing three dictionaries: + tuple, str or dict: + - Tuple with dict entries (not_in_schema1, not_in_schema1, unequal_entries). + - Formatted string with the output ready for printing. + - A Python dictionary with the output ready to be converted to JSON (for web output). + + Notes: The underlying dictionaries are: - not_in_schema1(dict): Entries present in schema2 but not in schema1. - not_in_schema2(dict): Entries present in schema1 but not in schema2. - unequal_entries(dict): Entries that differ between the two schemas. - - or a formatted string of the differences + """ _, not_in_1, not_in_2, unequal_entries = compare_schemas(schema1, schema2, attribute_filter=attribute_filter, sections=sections) diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 825594aea..94573a137 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -80,7 +80,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): Updates the relevant summary. """ - + df_new = df.copy() summary = dispatcher.summary_dicts.get(self.summary_name, None) if not summary: @@ -130,11 +130,11 @@ def get_details_dict(self, summary): this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)]) return {"Name": this_summary['Name'], "Total events": this_summary["Total events"], "Total files": this_summary['Total files'], - "Files": [name for name in this_summary['Files'].keys()], - "Specifics": {"Value columns": this_summary['Value columns'].keys(), + "Files": list(this_summary['Files'].keys()), + "Specifics": {"Value columns": list(this_summary['Value columns']), "Skip columns": this_summary['Skip columns'], - "Value columns": this_summary['Value columns'], - "Categorical columns": this_summary['Categorical columns'], + "Value column summaries": this_summary['Value columns'], + "Categorical column summaries": this_summary['Categorical columns'], "Categorical counts": this_summary['Categorical counts']}} def merge_all_info(self): @@ -209,9 +209,9 @@ def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT): cat_string = self._get_categorical_string(specifics, offset="", indent=indent) if cat_string: sum_list.append(cat_string) - val_cols = specifics.get("Value columns", {}) - if val_cols: - sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset="", indent=indent)) + val_dict = specifics.get("Value column summaries", {}) + if val_dict: + sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent)) return "\n".join(sum_list) def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT): @@ -228,12 +228,12 @@ def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Total events={result.get('Total events', 0)}"] specifics = result.get("Specifics", {}) - cat_cols = result.get("Categorical columns", {}) - if cat_cols: - sum_list.append(self._get_categorical_string(cat_cols, offset=indent, indent=indent)) - val_cols = result.get("Value columns", {}) - if val_cols: - sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset=indent, indent=indent)) + cat_dict = specifics.get("Categorical column summaries", {}) + if cat_dict: + sum_list.append(self._get_categorical_string(cat_dict, offset=indent, indent=indent)) + val_dict = specifics.get("Value column summaries", {}) + if val_dict: + sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset=indent, indent=indent)) return "\n".join(sum_list) def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 6be941352..5a1e21804 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -129,8 +129,7 @@ def get_details_dict(self, def_gatherer): known_defs_summary.update(ambiguous_defs_summary) known_defs_summary.update(errors_summary) return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary} - - return known_defs_summary + # return known_defs_summary def merge_all_info(self): """ Create an Object containing the definition summary. diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index f88650ccb..0fcec5411 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -35,8 +35,8 @@ class SummarizeHedTagsOp(BaseOp): }, "optional_parameters": { "append_timecode": bool, - "expand_definitions": bool, - "expand_context": bool + "expand_context": bool, + "expand_definitions": bool } } diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index e48333c5d..2ee73fc9e 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -223,7 +223,7 @@ def _run_single_schema_test(self, info, error_code, description,name, error_hand for test in tests: schema_string = "\n".join(test) try: - loaded_schema = from_string(schema_string, file_type=".mediawiki") + loaded_schema = from_string(schema_string, schema_format=".mediawiki") issues = loaded_schema.check_compliance() except HedFileError as e: issues = e.issues diff --git a/tests/schema/test_schema_converters.py b/tests/schema/test_schema_converters.py index e2e1eb465..5f7c1d121 100644 --- a/tests/schema/test_schema_converters.py +++ b/tests/schema/test_schema_converters.py @@ -47,7 +47,7 @@ def test_schema_as_string_wiki(self): with open(self.wiki_file) as file: hed_schema_as_string = "".join([line for line in file]) - string_schema = schema.from_string(hed_schema_as_string, file_type=".mediawiki") + string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki") self.assertEqual(string_schema, self.hed_schema_wiki) def test_wikischema2xml(self):