hed-standard · VisLab · Aug 7, 2023 · Jul 4, 2023 · Jul 10, 2023 · Jul 19, 2023
diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py
@@ -15,12 +15,12 @@
 MAX_MEMORY_CACHE = 20
 
 
-def from_string(schema_string, file_type=".xml", schema_namespace=None):
+def from_string(schema_string, schema_format=".xml", schema_namespace=None):
     """ Create a schema from the given string.
 
     Parameters:
         schema_string (str):         An XML or mediawiki file as a single long string.
-        file_type (str):             The extension(including the .) corresponding to a file source.
+        schema_format (str):         The schema format of the source schema string.
         schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
 
     Returns:
@@ -38,12 +38,12 @@ def from_string(schema_string, file_type=".xml", schema_namespace=None):
         raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string",
                            filename=schema_string)
 
-    if file_type.endswith(".xml"):
+    if schema_format.endswith(".xml"):
         hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string)
-    elif file_type.endswith(".mediawiki"):
+    elif schema_format.endswith(".mediawiki"):
         hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string)
     else:
-        raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=file_type)
+        raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)
 
     if schema_namespace:
         hed_schema.set_schema_prefix(schema_namespace=schema_namespace)
@@ -75,7 +75,7 @@ def load_schema(hed_path=None, schema_namespace=None):
 
     if is_url:
         file_as_string = schema_util.url_to_string(hed_path)
-        hed_schema = from_string(file_as_string, file_type=os.path.splitext(hed_path.lower())[1])
+        hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
     elif hed_path.lower().endswith(".xml"):
         hed_schema = SchemaLoaderXML.load(hed_path)
     elif hed_path.lower().endswith(".mediawiki"):

diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py
@@ -27,7 +27,8 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK
         section_dict.update(unequal_entries[section_key])
 
     if output == 'string':
-        return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ") for entries in matches.values()])
+        return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ")
+                          for entries in matches.values()])
     elif output == 'dict':
         output_dict = {}
         for section_name, section_entries in matches.items():
@@ -38,16 +39,16 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK
     return matches
 
 
-def compare_differences(schema1, schema2, output='default', attribute_filter=None, sections=(HedSectionKey.Tags,)):
+def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,)):
     """
     Compare the tags in two schemas, this finds any differences
 
     Parameters:
         schema1 (HedSchema): The first schema to be compared.
         schema2 (HedSchema): The second schema to be compared.
-        output (str): Defaults to returning a set of python object dicts.
+        output (str): 'raw' (default) returns a tuple of python object dicts with raw results.
                       'string' returns a single string
-                      'dict' returns a json style dictionary
+                      'dict' returns a json-style python dictionary that can be converted to JSON
         attribute_filter (str, optional): The attribute to filter entries by.
                                           Entries without this attribute are skipped.
                                           The most common use would be HedKey.InLibrary
@@ -56,11 +57,16 @@ def compare_differences(schema1, schema2, output='default', attribute_filter=Non
                 If None, checks all sections including header, prologue, and epilogue.
 
     Returns:
-    tuple or str: A tuple containing three dictionaries:
+        tuple, str or dict: 
+        - Tuple with dict entries (not_in_schema1, not_in_schema1, unequal_entries).
+        - Formatted string with the output ready for printing.
+        - A Python dictionary with the output ready to be converted to JSON (for web output).
+
+    Notes: The underlying dictionaries are:
         - not_in_schema1(dict): Entries present in schema2 but not in schema1.
         - not_in_schema2(dict): Entries present in schema1 but not in schema2.
         - unequal_entries(dict): Entries that differ between the two schemas.
-        - or a formatted string of the differences
+
     """
     _, not_in_1, not_in_2, unequal_entries = compare_schemas(schema1, schema2, attribute_filter=attribute_filter,
                                                              sections=sections)

diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py
@@ -80,7 +80,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
             Updates the relevant summary.
 
         """
-       
+
         df_new = df.copy()
         summary = dispatcher.summary_dicts.get(self.summary_name, None)
         if not summary:
@@ -130,11 +130,11 @@ def get_details_dict(self, summary):
             this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)])
         return {"Name": this_summary['Name'], "Total events": this_summary["Total events"],
                 "Total files": this_summary['Total files'],
-                "Files": [name for name in this_summary['Files'].keys()],
-                "Specifics": {"Value columns": this_summary['Value columns'].keys(),
+                "Files": list(this_summary['Files'].keys()),
+                "Specifics": {"Value columns": list(this_summary['Value columns']),
                               "Skip columns": this_summary['Skip columns'],
-                              "Value columns": this_summary['Value columns'],
-                              "Categorical columns": this_summary['Categorical columns'],
+                              "Value column summaries": this_summary['Value columns'],
+                              "Categorical column summaries": this_summary['Categorical columns'],
                               "Categorical counts": this_summary['Categorical counts']}}
 
     def merge_all_info(self):
@@ -209,9 +209,9 @@ def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
         cat_string = self._get_categorical_string(specifics, offset="", indent=indent)
         if cat_string:
             sum_list.append(cat_string)
-        val_cols = specifics.get("Value columns", {})
-        if val_cols:
-            sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset="", indent=indent))
+        val_dict = specifics.get("Value column summaries", {})
+        if val_dict:
+            sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent))
         return "\n".join(sum_list)
 
     def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
@@ -228,12 +228,12 @@ def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
         """
         sum_list = [f"Total events={result.get('Total events', 0)}"]
         specifics = result.get("Specifics", {})
-        cat_cols = result.get("Categorical columns", {})
-        if cat_cols:
-            sum_list.append(self._get_categorical_string(cat_cols, offset=indent, indent=indent))
-        val_cols = result.get("Value columns", {})
-        if val_cols:
-            sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset=indent, indent=indent))
+        cat_dict = specifics.get("Categorical column summaries", {})
+        if cat_dict:
+            sum_list.append(self._get_categorical_string(cat_dict, offset=indent, indent=indent))
+        val_dict = specifics.get("Value column summaries", {})
+        if val_dict:
+            sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset=indent, indent=indent))
         return "\n".join(sum_list)
 
     def _get_categorical_col(self, entry, count_dict, offset="", indent="   "):

diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py
@@ -129,8 +129,7 @@ def get_details_dict(self, def_gatherer):
         known_defs_summary.update(ambiguous_defs_summary)
         known_defs_summary.update(errors_summary)
         return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary}
-
-        return known_defs_summary
+        # return known_defs_summary
 
     def merge_all_info(self):
         """ Create an Object containing the definition summary.

diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -35,8 +35,8 @@ class SummarizeHedTagsOp(BaseOp):
         },
         "optional_parameters": {
             "append_timecode": bool,
-            "expand_definitions": bool,
-            "expand_context": bool
+            "expand_context": bool,
+            "expand_definitions": bool
         }
     }
 

diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
@@ -223,7 +223,7 @@ def _run_single_schema_test(self, info, error_code, description,name, error_hand
             for test in tests:
                 schema_string = "\n".join(test)
                 try:
-                    loaded_schema = from_string(schema_string, file_type=".mediawiki")
+                    loaded_schema = from_string(schema_string, schema_format=".mediawiki")
                     issues = loaded_schema.check_compliance()
                 except HedFileError as e:
                     issues = e.issues

diff --git a/tests/schema/test_schema_converters.py b/tests/schema/test_schema_converters.py
@@ -47,7 +47,7 @@ def test_schema_as_string_wiki(self):
         with open(self.wiki_file) as file:
             hed_schema_as_string = "".join([line for line in file])
 
-        string_schema = schema.from_string(hed_schema_as_string, file_type=".mediawiki")
+        string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki")
         self.assertEqual(string_schema, self.hed_schema_wiki)
 
     def test_wikischema2xml(self):