From 9852e849c3e412166988a03f18734b34ad9e207f Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Sun, 6 Aug 2023 07:25:53 -0500
Subject: [PATCH 1/3] Wrote experimental code to compare schemas

---
 hed/schema/schema_compare.py                               | 7 ++++---
 .../remodeling/operations/summarize_column_values_op.py    | 6 +++---
 .../remodeling/operations/summarize_definitions_op.py      | 3 +--
 hed/tools/remodeling/operations/summarize_hed_tags_op.py   | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py
index 49d5c72b6..57bdb656e 100644
--- a/hed/schema/schema_compare.py
+++ b/hed/schema/schema_compare.py
@@ -20,7 +20,8 @@ def find_matching_tags(schema1, schema2, return_string=False):
         section_dict.update(unequal_entries[section_key])
 
     if return_string:
-        return "\n".join([pretty_print_diff_all(entries, prompt="Found matching node ") for entries in matches.values()])
+        return "\n".join([pretty_print_diff_all(entries,
+                                                prompt="Found matching node ") for entries in matches.values()])
     return matches
 
 
@@ -64,7 +65,7 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section
         attribute_filter (str, optional): The attribute to filter entries by.
                                           Entries without this attribute are skipped.
                                           If it evaluates to False, no filtering is performed.
-        sections(list): the list of sections to compare.  By default, just the tags section.
+        sections(tuple): the list of sections to compare.  By default, just the tags section.
 
     Returns:
     tuple: A tuple containing four dictionaries:
@@ -199,4 +200,4 @@ def pretty_print_missing_all(entries, schema_name):
         output.append(f"'{key}' not in {schema_name}':")
         output += pretty_print_entry(entry)
 
-    return "\n".join(output)
\ No newline at end of file
+    return "\n".join(output)
diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py
index 825594aea..b53d8aa38 100644
--- a/hed/tools/remodeling/operations/summarize_column_values_op.py
+++ b/hed/tools/remodeling/operations/summarize_column_values_op.py
@@ -80,7 +80,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
             Updates the relevant summary.
 
         """
-       
+
         df_new = df.copy()
         summary = dispatcher.summary_dicts.get(self.summary_name, None)
         if not summary:
@@ -133,8 +133,8 @@ def get_details_dict(self, summary):
                 "Files": [name for name in this_summary['Files'].keys()],
                 "Specifics": {"Value columns": this_summary['Value columns'].keys(),
                               "Skip columns": this_summary['Skip columns'],
-                              "Value columns": this_summary['Value columns'],
-                              "Categorical columns": this_summary['Categorical columns'],
+                              "Value column summaries": this_summary['Value columns'],
+                              "Categorical column summaries": this_summary['Categorical columns'],
                               "Categorical counts": this_summary['Categorical counts']}}
 
     def merge_all_info(self):
diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py
index 6be941352..5a1e21804 100644
--- a/hed/tools/remodeling/operations/summarize_definitions_op.py
+++ b/hed/tools/remodeling/operations/summarize_definitions_op.py
@@ -129,8 +129,7 @@ def get_details_dict(self, def_gatherer):
         known_defs_summary.update(ambiguous_defs_summary)
         known_defs_summary.update(errors_summary)
         return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary}
-        
-        return known_defs_summary
+        # return known_defs_summary
 
     def merge_all_info(self):
         """ Create an Object containing the definition summary.
diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
index f88650ccb..0fcec5411 100644
--- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -35,8 +35,8 @@ class SummarizeHedTagsOp(BaseOp):
         },
         "optional_parameters": {
             "append_timecode": bool,
-            "expand_definitions": bool,
-            "expand_context": bool
+            "expand_context": bool,
+            "expand_definitions": bool
         }
     }
 

From 3c71d262c751cd78eeaae9d38a4bc4098e5d9510 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Sun, 6 Aug 2023 09:39:56 -0500
Subject: [PATCH 2/3] Corrected column value summary output

---
 hed/schema/schema_compare.py                  | 18 ++++++++++-----
 .../operations/summarize_column_values_op.py  | 22 +++++++++----------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py
index 5729dd748..fc2caf41e 100644
--- a/hed/schema/schema_compare.py
+++ b/hed/schema/schema_compare.py
@@ -27,7 +27,8 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK
         section_dict.update(unequal_entries[section_key])
 
     if output == 'string':
-        return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ") for entries in matches.values()])
+        return "\n".join([_pretty_print_diff_all(entries, prompt="Found matching node ")
+                          for entries in matches.values()])
     elif output == 'dict':
         output_dict = {}
         for section_name, section_entries in matches.items():
@@ -38,16 +39,16 @@ def find_matching_tags(schema1, schema2, output='default', sections=(HedSectionK
     return matches
 
 
-def compare_differences(schema1, schema2, output='default', attribute_filter=None, sections=(HedSectionKey.Tags,)):
+def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,)):
     """
     Compare the tags in two schemas, this finds any differences
 
     Parameters:
         schema1 (HedSchema): The first schema to be compared.
         schema2 (HedSchema): The second schema to be compared.
-        output (str): Defaults to returning a set of python object dicts.
+        output (str): 'raw' (default) returns a tuple of python object dicts with raw results.
                       'string' returns a single string
-                      'dict' returns a json style dictionary
+                      'dict' returns a json-style python dictionary that can be converted to JSON
         attribute_filter (str, optional): The attribute to filter entries by.
                                           Entries without this attribute are skipped.
                                           The most common use would be HedKey.InLibrary
@@ -56,11 +57,16 @@ def compare_differences(schema1, schema2, output='default', attribute_filter=Non
                 If None, checks all sections including header, prologue, and epilogue.
 
     Returns:
-    tuple or str: A tuple containing three dictionaries:
+        tuple, str or dict: 
+        - Tuple with dict entries (not_in_schema1, not_in_schema1, unequal_entries).
+        - Formatted string with the output ready for printing.
+        - A Python dictionary with the output ready to be converted to JSON (for web output).
+
+    Notes: The underlying dictionaries are:
         - not_in_schema1(dict): Entries present in schema2 but not in schema1.
         - not_in_schema2(dict): Entries present in schema1 but not in schema2.
         - unequal_entries(dict): Entries that differ between the two schemas.
-        - or a formatted string of the differences
+
     """
     _, not_in_1, not_in_2, unequal_entries = compare_schemas(schema1, schema2, attribute_filter=attribute_filter,
                                                              sections=sections)
diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py
index b53d8aa38..94573a137 100644
--- a/hed/tools/remodeling/operations/summarize_column_values_op.py
+++ b/hed/tools/remodeling/operations/summarize_column_values_op.py
@@ -130,8 +130,8 @@ def get_details_dict(self, summary):
             this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)])
         return {"Name": this_summary['Name'], "Total events": this_summary["Total events"],
                 "Total files": this_summary['Total files'],
-                "Files": [name for name in this_summary['Files'].keys()],
-                "Specifics": {"Value columns": this_summary['Value columns'].keys(),
+                "Files": list(this_summary['Files'].keys()),
+                "Specifics": {"Value columns": list(this_summary['Value columns']),
                               "Skip columns": this_summary['Skip columns'],
                               "Value column summaries": this_summary['Value columns'],
                               "Categorical column summaries": this_summary['Categorical columns'],
@@ -209,9 +209,9 @@ def _get_dataset_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
         cat_string = self._get_categorical_string(specifics, offset="", indent=indent)
         if cat_string:
             sum_list.append(cat_string)
-        val_cols = specifics.get("Value columns", {})
-        if val_cols:
-            sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset="", indent=indent))
+        val_dict = specifics.get("Value column summaries", {})
+        if val_dict:
+            sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent))
         return "\n".join(sum_list)
 
     def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
@@ -228,12 +228,12 @@ def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT):
         """
         sum_list = [f"Total events={result.get('Total events', 0)}"]
         specifics = result.get("Specifics", {})
-        cat_cols = result.get("Categorical columns", {})
-        if cat_cols:
-            sum_list.append(self._get_categorical_string(cat_cols, offset=indent, indent=indent))
-        val_cols = result.get("Value columns", {})
-        if val_cols:
-            sum_list.append(ColumnValueSummary._get_value_string(val_cols, offset=indent, indent=indent))
+        cat_dict = specifics.get("Categorical column summaries", {})
+        if cat_dict:
+            sum_list.append(self._get_categorical_string(cat_dict, offset=indent, indent=indent))
+        val_dict = specifics.get("Value column summaries", {})
+        if val_dict:
+            sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset=indent, indent=indent))
         return "\n".join(sum_list)
 
     def _get_categorical_col(self, entry, count_dict, offset="", indent="   "):

From 6ea3d7bb03a5d52c9d5fc4cfae26949f572e12c1 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Mon, 7 Aug 2023 07:47:31 -0500
Subject: [PATCH 3/3] Minor corrections to docs

---
 hed/schema/hed_schema_io.py            | 12 ++++++------
 spec_tests/test_errors.py              |  2 +-
 tests/schema/test_schema_converters.py |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py
index 60a42d154..11980ed47 100644
--- a/hed/schema/hed_schema_io.py
+++ b/hed/schema/hed_schema_io.py
@@ -15,12 +15,12 @@
 MAX_MEMORY_CACHE = 20
 
 
-def from_string(schema_string, file_type=".xml", schema_namespace=None):
+def from_string(schema_string, schema_format=".xml", schema_namespace=None):
     """ Create a schema from the given string.
 
     Parameters:
         schema_string (str):         An XML or mediawiki file as a single long string.
-        file_type (str):             The extension(including the .) corresponding to a file source.
+        schema_format (str):         The schema format of the source schema string.
         schema_namespace (str, None):  The name_prefix all tags in this schema will accept.
 
     Returns:
@@ -38,12 +38,12 @@ def from_string(schema_string, file_type=".xml", schema_namespace=None):
         raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string",
                            filename=schema_string)
 
-    if file_type.endswith(".xml"):
+    if schema_format.endswith(".xml"):
         hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string)
-    elif file_type.endswith(".mediawiki"):
+    elif schema_format.endswith(".mediawiki"):
         hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string)
     else:
-        raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=file_type)
+        raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format)
 
     if schema_namespace:
         hed_schema.set_schema_prefix(schema_namespace=schema_namespace)
@@ -75,7 +75,7 @@ def load_schema(hed_path=None, schema_namespace=None):
 
     if is_url:
         file_as_string = schema_util.url_to_string(hed_path)
-        hed_schema = from_string(file_as_string, file_type=os.path.splitext(hed_path.lower())[1])
+        hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1])
     elif hed_path.lower().endswith(".xml"):
         hed_schema = SchemaLoaderXML.load(hed_path)
     elif hed_path.lower().endswith(".mediawiki"):
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index e48333c5d..2ee73fc9e 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -223,7 +223,7 @@ def _run_single_schema_test(self, info, error_code, description,name, error_hand
             for test in tests:
                 schema_string = "\n".join(test)
                 try:
-                    loaded_schema = from_string(schema_string, file_type=".mediawiki")
+                    loaded_schema = from_string(schema_string, schema_format=".mediawiki")
                     issues = loaded_schema.check_compliance()
                 except HedFileError as e:
                     issues = e.issues
diff --git a/tests/schema/test_schema_converters.py b/tests/schema/test_schema_converters.py
index e2e1eb465..5f7c1d121 100644
--- a/tests/schema/test_schema_converters.py
+++ b/tests/schema/test_schema_converters.py
@@ -47,7 +47,7 @@ def test_schema_as_string_wiki(self):
         with open(self.wiki_file) as file:
             hed_schema_as_string = "".join([line for line in file])
 
-        string_schema = schema.from_string(hed_schema_as_string, file_type=".mediawiki")
+        string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki")
         self.assertEqual(string_schema, self.hed_schema_wiki)
 
     def test_wikischema2xml(self):