hed-standard · VisLab · Mar 16, 2023 · Mar 16, 2023
diff --git a/hed/models/df_util.py b/hed/models/df_util.py
@@ -14,7 +14,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
             The path to the tabular file, or a TabularInput object representing it.
         sidecar: str or Sidecar
             The path to the sidecar file, or a Sidecar object representing it.
-        hed_schema: str or HedSchema
+        hed_schema: HedSchema
             If str, will attempt to load as a version if it doesn't have a valid extension.
         extra_def_dicts: list of DefinitionDict, optional
             Any extra DefinitionDict objects to use when parsing the HED tags.

diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py
@@ -6,13 +6,16 @@
 from hed.tools.util.data_util import separate_values
 from hed.models.hed_tag import HedTag
 from hed.models.hed_group import HedGroup
+from hed.models.df_util import get_assembled, expand_defs
 
 
-def assemble_hed(data_input, columns_included=None, expand_defs=False):
+def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
     """ Return assembled HED annotations in a dataframe.
 
     Parameters:
         data_input (TabularInput): The tabular input file whose HED annotations are to be assembled.
+        sidecar (Sidecar):  Sidecar with definitions.
+        schema (HedSchema):  Hed schema
         columns_included (list or None):  A list of additional column names to include.
             If None, only the list of assembled tags is included.
         expand_defs (bool): If True, definitions are expanded when the events are assembled.
@@ -23,14 +26,19 @@ def assemble_hed(data_input, columns_included=None, expand_defs=False):
     """
 
     eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
-    hed_obj_list = get_assembled_strings(data_input, expand_defs=expand_defs)
-    hed_string_list = [str(hed) for hed in hed_obj_list]
+    hed_string_list = data_input.series_a
+    definitions = sidecar.get_def_dict(hed_schema=schema)
+    if expand_defs:
+        expand_defs(hed_string_list, schema, definitions, columns=None)
+    # hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
+    #                                    shrink_defs=False, expand_defs=True)
+    # hed_string_list = [str(hed) for hed in hed_obj_list]
     if not eligible_columns:
         df = pd.DataFrame({"HED_assembled": hed_string_list})
     else:
         df = data_input.dataframe[eligible_columns].copy(deep=True)
         df['HED_assembled'] = hed_string_list
-    definitions = data_input.get_definitions().gathered_defs
+    # definitions = data_input.get_definitions().gathered_defs
     return df, definitions
 
 

diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py
@@ -35,7 +35,7 @@ def __init__(self, hed_strings, hed_schema):
 
         """
 
-        self.hed_strings = [HedString(str(hed), hed_schema=hed_schema) for hed in hed_strings]
+        self.hed_strings = hed_strings
         if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup):
             raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup")
         self.hed_schema = hed_schema

diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_definitions.py
@@ -1,7 +1,7 @@
 """ Manages definitions associated with a type such as condition-variable. """
 
 from hed.models.hed_tag import HedTag
-from hed.models.def_mapper import DefMapper
+from hed.models.definition_dict import DefinitionDict
 
 
 class HedTypeDefinitions:
@@ -10,16 +10,18 @@ def __init__(self, definitions, hed_schema, type_tag='condition-variable'):
         """ Create a definition manager for a type of variable.
 
         Parameters:
-            definitions (dict or DefMapper): A dictionary of DefinitionEntry objects.
+            definitions (dict or DefinitionDict): A dictionary of DefinitionEntry objects.
             hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing.
             type_tag (str): Lower-case HED tag string representing the type managed.
 
+        # TODO: [Refactor] - should dict be allowed for definitions.
+
         """
 
         self.type_tag = type_tag.lower()
         self.hed_schema = hed_schema
-        if isinstance(definitions, DefMapper):
-            self.definitions = definitions.gathered_defs
+        if isinstance(definitions, DefinitionDict):
+            self.definitions = definitions.defs
         elif isinstance(definitions, dict):
             self.definitions = definitions
         else:

diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py
@@ -0,0 +1,70 @@
+""" Convert the type of the specified columns of a tabular file. """
+
+from hed.tools.remodeling.operations.base_op import BaseOp
+
+
+class ConvertColumnsOp(BaseOp):
+    """ Convert.
+
+    Required remodeling parameters:   
+        - **column_names** (*list*):   The list of columns to convert.   
+        - **convert_to_** (*str*):  Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)   
+        - **decimal_places** (*int*):   Number decimal places to keep (for fixed only).   
+
+
+    """
+
+    PARAMS = {
+        "operation": "convert_columns",
+        "required_parameters": {
+            "column_names": list,
+            "convert_to": str
+        },
+        "optional_parameters": {
+            "decimal_places": int
+        }
+    }
+
+    def __init__(self, parameters):
+        """ Constructor for the convert columns operation.
+
+        Parameters:
+            parameters (dict): Parameter values for required and optional parameters.
+
+        Raises:  
+            KeyError    
+                - If a required parameter is missing.    
+                - If an unexpected parameter is provided.    
+
+            TypeError   
+                - If a parameter has the wrong type.   
+
+            ValueError   
+                - If convert_to is not one of the allowed values.   
+
+        """
+        super().__init__(self.PARAMS, parameters)
+        self.column_names = parameters['column_names']
+        self.convert_to = parameters['convert_to']
+        self.decimal_places = parameters.get('decimal_places', None)
+        self.allowed_types = ['str', 'int', 'float', 'fixed']
+        if self.convert_to not in self.allowed_types:
+            raise ValueError("CannotConvertToSpecifiedType",
+                             f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}")
+
+    def do_op(self, dispatcher, df, name, sidecar=None):
+        """ Convert the specified column to a specified type.
+
+        Parameters:
+            dispatcher (Dispatcher): Manages the operation I/O.
+            df (DataFrame): The DataFrame to be remodeled.
+            name (str): Unique identifier for the dataframe -- often the original file path.
+            sidecar (Sidecar or file-like):  Only needed for HED operations.
+
+        Returns:
+            DataFrame: A new DataFrame with the factor columns appended.
+
+        """
+
+        df_new = df.copy()
+        return df_new
diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py
@@ -1,5 +1,6 @@
 """ The valid operations for the remodeling tools. """
 
+# from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp
 from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp
 from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp
 from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp
@@ -20,6 +21,7 @@
 from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp
 
 valid_operations = {
+    # 'convert_columns': ConvertColumnsOp,
     'factor_column': FactorColumnOp,
     'factor_hed_tags': FactorHedTagsOp,
     'factor_hed_type': FactorHedTypeOp,

diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py
@@ -22,13 +22,14 @@ def setUpClass(cls):
 
         hed_schema = hedschema.load_schema(schema_path)
         cls.hed_schema = hed_schema
-        sidecar1 = Sidecar(json_path, name='face_sub1_json', hed_schema=hed_schema)
+        sidecar1 = Sidecar(json_path, name='face_sub1_json')
         cls.sidecar_path = sidecar1
-        cls.input_data = TabularInput(events_path, hed_schema=hed_schema, sidecar=sidecar1, name="face_sub1_events")
+        cls.sidecar1 = sidecar1
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")
 
     def test_assemble_hed_included_no_expand(self):
-        df1, dict1 = assemble_hed(self.input_data,
+        df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
                                   columns_included=["onset", "duration", "event_type"], expand_defs=False)
         self.assertIsInstance(df1, DataFrame, "hed_assemble should return a dataframe when columns are included")
         columns1 = list(df1.columns)
@@ -38,11 +39,11 @@ def test_assemble_hed_included_no_expand(self):
         self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags")
         self.assertEqual(first_str1.find('Def-expand'), -1,
                          "assemble_hed with no def expand does not have Def-expand tags")
-        self.assertIsInstance(dict1, dict, "hed_assemble returns a dictionary of definitions")
-        self.assertEqual(len(dict1), 17, "hed_assemble definition dictionary has the right number of elements.")
+        self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions")
+        self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")
 
     def test_assemble_hed_included_expand(self):
-        df2, dict2 = assemble_hed(self.input_data,
+        df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
                                   columns_included=["onset", "duration", "event_type"], expand_defs=True)
         first_str2 = df2.iloc[0]['HED_assembled']
         self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")

diff --git a/tests/tools/analysis/test_hed_context_manager.py b/tests/tools/analysis/test_hed_context_manager.py
@@ -1,13 +1,12 @@
 import os
 import unittest
 from hed.errors.exceptions import HedFileError
-from hed.models.hed_group import HedGroup
 from hed.models.hed_string import HedString
 from hed.models.sidecar import Sidecar
 from hed.models.tabular_input import TabularInput
 from hed.schema.hed_schema_io import load_schema_version
-from hed.tools.analysis.hed_context_manager import HedContextManager, OnsetGroup
-from hed.tools.analysis.analysis_util import get_assembled_strings
+from hed.tools.analysis.hed_context_manager import HedContextManager
+from hed.models.df_util import get_assembled
 
 
 class Test(unittest.TestCase):
@@ -37,7 +36,8 @@ def setUpClass(cls):
                                                     'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
         sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
         sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
-        cls.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
+        cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
+        cls.sidecar1 = sidecar1
         cls.schema = schema
 
     # def test_onset_group(self):
@@ -71,13 +71,14 @@ def test_constructor(self):
         self.assertIsInstance(context, list, "The constructor event contexts should be a list")
         self.assertIsInstance(context[1], HedString, "The constructor event contexts has a correct element")
 
-    def test_constructor(self):
+    def test_constructor1(self):
         with self.assertRaises(ValueError) as cont:
             HedContextManager(self.test_strings1, None)
         self.assertEqual(cont.exception.args[0], "ContextRequiresSchema")
 
     def test_iter(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
+        hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                       join_columns=True, shrink_defs=True, expand_defs=False)
         manager1 = HedContextManager(hed_strings, self.schema)
         i = 0
         for hed, context in manager1.iter_context():
@@ -86,7 +87,8 @@ def test_iter(self):
             i = i + 1
 
     def test_constructor_from_assembled(self):
-        hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
+        hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
+                                       join_columns=True, shrink_defs=True, expand_defs=False)
         manager1 = HedContextManager(hed_strings, self.schema)
         self.assertEqual(len(manager1.hed_strings), 200,
                          "The constructor for assembled strings has expected # of strings")

diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py
@@ -24,7 +24,7 @@ def setUpClass(cls):
         schema = hedschema.load_schema(schema_path)
         cls.hed_schema = schema
         sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
+        input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
         input_df, def_dict = assemble_hed(input_data, expand_defs=False)
         cls.input_df = input_df
         cls.def_dict = def_dict

diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py
@@ -0,0 +1,50 @@
+import pandas as pd
+import numpy as np
+import unittest
+from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp
+from hed.tools.remodeling.dispatcher import Dispatcher
+
+
+class Test(unittest.TestCase):
+    """
+
+    TODO: Test when no factor names and values are given.
+
+    """
+    @classmethod
+    def setUpClass(cls):
+        cls.sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'],
+                           [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female'],
+                           [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female'],
+                           [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female'],
+                           [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male'],
+                           [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male']]
+        cls.factored = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female', 0, 0],
+                        [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female', 0, 1],
+                        [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female', 0, 0],
+                        [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female', 1, 0],
+                        [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male', 0, 1],
+                        [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male', 0, 0]]
+        cls.sample_columns = ['onset', 'duration', 'trial_type', 'stop_signal_delay', 'response_time',
+                              'response_accuracy', 'response_hand', 'sex']
+        cls.default_factor_columns = ["trial_type.succesful_stop", "trial_type.unsuccesful_stop"]
+
+    def setUp(self):
+        self.base_parameters = {
+            "column_names": ["onset", "duration", "response_time"],
+            "convert_to": "int"
+        }
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_constructor_bad_convert_to(self):
+        self.base_parameters["convert_to"] = "blech"
+        with self.assertRaises(ValueError) as context:
+            ConvertColumnsOp(self.base_parameters)
+        self.assertEqual(context.exception.args[0], "CannotConvertToSpecifiedType")
+
+
+if __name__ == '__main__':
+    unittest.main()