From c8db8ba8f511a98078a203f17b7a622c7e3c7170 Mon Sep 17 00:00:00 2001
From: VisLab <1189050+VisLab@users.noreply.github.com>
Date: Sun, 19 Mar 2023 07:36:58 -0500
Subject: [PATCH] Revert "Add more unit tests.  better nan and empty column
 handling"

---
 hed/models/base_input.py               |  58 ++----
 hed/validator/spreadsheet_validator.py |   1 -
 tests/models/test_base_file_input.py   | 103 +++++++++
 tests/models/test_base_input.py        | 276 -------------------------
 tests/models/test_df_util.py           |  45 +---
 5 files changed, 126 insertions(+), 357 deletions(-)
 create mode 100644 tests/models/test_base_file_input.py
 delete mode 100644 tests/models/test_base_input.py

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index f50ea5e4c..869bc4ea6 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -7,7 +7,6 @@
 from hed.models.column_mapper import ColumnMapper
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.errors.error_reporter import ErrorHandler
-import pandas as pd
 
 
 class BaseInput:
@@ -67,7 +66,10 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T
         elif not file:
             raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
         elif input_type in self.TEXT_EXTENSION:
-            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, dtype=str)
+            self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header,
+                                              dtype=str, keep_default_na=True, na_values=None)
+            # Convert nan values to a known value
+            self._dataframe = self._dataframe.fillna("n/a")
         elif input_type in self.EXCEL_EXTENSION:
             self._loaded_workbook = openpyxl.load_workbook(file)
             loaded_worksheet = self.get_worksheet(self._worksheet_name)
@@ -362,7 +364,7 @@ def assemble(self, mapper=None):
         """
         if mapper is None:
             mapper = self._mapper
-
+        import pandas as pd
         transformers, need_categorical = mapper.get_transformers()
         if not transformers:
             return None
@@ -372,53 +374,35 @@ def assemble(self, mapper=None):
 
         all_columns = all_columns.transform(transformers)
 
-        return self._insert_columns(all_columns, list(transformers.keys()))
-
-    @staticmethod
-    def _find_column_refs(df):
+        possible_column_references = [f"{column_name}" for column_name in self.columns if
+                                      column_name.lower() != "hed"]
         found_column_references = []
-        for column_name in df:
-            df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
-            u_vals = pd.Series([j for i in df_temp for j in i], dtype=str)
+        for column_name in all_columns:
+            df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE)
+            u_vals = pd.Series([j for i in df for j in i], dtype=str)
             u_vals = u_vals.unique()
             for val in u_vals:
                 if val not in found_column_references:
                     found_column_references.append(val)
 
-        return found_column_references
-
-    @staticmethod
-    def _insert_columns(df, known_columns=None):
-        if known_columns is None:
-            known_columns = list(df.columns)
-        possible_column_references = [f"{column_name}" for column_name in df.columns if
-                                      column_name.lower() != "hed"]
-        found_column_references = BaseInput._find_column_refs(df)
-
-        invalid_replacements = [col for col in found_column_references if col not in possible_column_references]
-        if invalid_replacements:
-            # todo: This check may be moved to validation
-            raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}")
         valid_replacements = [col for col in found_column_references if col in possible_column_references]
 
-        # todo: break this into a sub function(probably)
-        column_names = known_columns
+        column_names = list(transformers.keys())
         for column_name in valid_replacements:
             column_names.remove(column_name)
-        saved_columns = df[valid_replacements]
+        saved_columns = all_columns[valid_replacements]
         for column_name in column_names:
             for replacing_name in valid_replacements:
                 column_name_brackets = f"[{replacing_name}]"
-                df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
-                                            in zip(df[column_name], saved_columns[replacing_name]))
-        df = df[column_names]
+                all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y
+                                                     in zip(all_columns[column_name], saved_columns[replacing_name]))
+        all_columns = all_columns[column_names]
 
-        return df
+        return all_columns
 
     @staticmethod
     def combine_dataframe(dataframe):
-        """ Combines all columns in the given dataframe into a single HED string series,
-            skipping empty columns and columns with empty strings.
+        """ Combines all columns in the given dataframe into a single hed string series.
 
         Parameters:
             dataframe(Dataframe): The dataframe to combine
@@ -426,8 +410,8 @@ def combine_dataframe(dataframe):
         Returns:
             Series: the assembled series
         """
-        dataframe = dataframe.agg(
-            lambda x: ', '.join(filter(lambda e: pd.notna(e) and e != "", x)), axis=1
-        )
+        dataframe = dataframe.agg(', '.join, axis=1)
 
-        return dataframe
\ No newline at end of file
+        # Potentially better ways to handle removing n/a by never inserting them to begin with.
+        dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True)
+        return dataframe
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index ba1f341ac..136b5aa73 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -41,7 +41,6 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         # Check the structure of the input data, if it's a BaseInput
         if isinstance(data, BaseInput):
             issues += self._validate_column_structure(data, error_handler)
-            # todo ian: Add more checks here for column inserters
             data = data.dataframe_a
 
         # Check the rows of the input data
diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py
new file mode 100644
index 000000000..8314072bd
--- /dev/null
+++ b/tests/models/test_base_file_input.py
@@ -0,0 +1,103 @@
+import unittest
+import os
+import shutil
+from hed import Sidecar
+from hed import BaseInput, TabularInput
+from hed.models.column_mapper import ColumnMapper
+from hed.models import DefinitionDict
+from hed import schema
+
+# TODO: Add tests for base_file_input and include correct handling of 'n/a'
+
+
+class Test(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # todo: clean up these unit tests/add more
+        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
+        cls.base_data_dir = base_data_dir
+        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
+        # cls.json_def_filename = json_def_filename
+        json_def_sidecar = Sidecar(json_def_filename)
+        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
+        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
+
+        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
+        cls.base_output_folder = base_output
+        os.makedirs(base_output, exist_ok=True)
+
+        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                       '../data/bids_tests/eeg_ds003645s_hed'))
+        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                    '../data/schema_tests/HED8.0.0.xml'))
+        cls.bids_root_path = bids_root_path
+        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+        events_path = os.path.realpath(os.path.join(bids_root_path,
+                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+
+        cls.hed_schema = schema.load_schema(schema_path)
+        sidecar1 = Sidecar(json_path, name='face_sub1_json')
+        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
+        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
+                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
+        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.base_output_folder)
+
+    def test_gathered_defs(self):
+        # todo: add unit tests for definitions in tsv file
+        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
+        expected_defs = {
+            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
+            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
+            'jsonfiledef3': '(Item/JsonDef3/#)',
+            'takesvaluedef': '(Age/#)',
+            'valueclassdef': '(Acceleration/#)'
+        }
+        self.assertEqual(defs, expected_defs)
+
+    # def test_missing_column_name_issue(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+    #
+    # def test_expand_column_issues(self):
+    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_schema.mediawiki')
+    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
+    #
+    #     hed_schema = schema.load_schema(schema_path)
+    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+    #                                              "../data/validator_tests/bids_events.json")
+    #     validator = HedValidator(hed_schema=hed_schema)
+    #     sidecar = Sidecar(json_path)
+    #     issues = sidecar.validate_entries(validator)
+    #     self.assertEqual(len(issues), 0)
+    #     input_file = TabularInput(events_path, sidecars=sidecar)
+    #
+    #     validation_issues = input_file.validate_sidecar(validator)
+    #     self.assertEqual(len(validation_issues), 0)
+    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
+    #     self.assertEqual(len(validation_issues), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
deleted file mode 100644
index 392599f78..000000000
--- a/tests/models/test_base_input.py
+++ /dev/null
@@ -1,276 +0,0 @@
-import io
-import unittest
-import os
-import shutil
-from hed import Sidecar
-from hed import BaseInput, TabularInput
-from hed.models.column_mapper import ColumnMapper
-from hed.models import DefinitionDict
-from hed import schema
-import pandas as pd
-import numpy as np
-
-
-class Test(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        # todo: clean up these unit tests/add more
-        base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/'))
-        cls.base_data_dir = base_data_dir
-        json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json")
-        # cls.json_def_filename = json_def_filename
-        json_def_sidecar = Sidecar(json_def_filename)
-        events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv')
-        cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar)
-
-        base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/")
-        cls.base_output_folder = base_output
-        os.makedirs(base_output, exist_ok=True)
-
-        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                       '../data/bids_tests/eeg_ds003645s_hed'))
-        schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                    '../data/schema_tests/HED8.0.0.xml'))
-        cls.bids_root_path = bids_root_path
-        json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
-        events_path = os.path.realpath(os.path.join(bids_root_path,
-                                                    'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
-
-        cls.hed_schema = schema.load_schema(schema_path)
-        sidecar1 = Sidecar(json_path, name='face_sub1_json')
-        mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False)
-        cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True,
-                                    name="face_sub1_events", mapper=mapper1, allow_blank_names=False)
-        cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events")
-
-    @classmethod
-    def tearDownClass(cls):
-        shutil.rmtree(cls.base_output_folder)
-
-    def test_gathered_defs(self):
-        # todo: add unit tests for definitions in tsv file
-        defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema))
-        expected_defs = {
-            'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)',
-            'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)',
-            'jsonfiledef3': '(Item/JsonDef3/#)',
-            'takesvaluedef': '(Age/#)',
-            'valueclassdef': '(Acceleration/#)'
-        }
-        self.assertEqual(defs, expected_defs)
-
-    # def test_missing_column_name_issue(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_column_name.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-    #
-    # def test_expand_column_issues(self):
-    #     schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_schema.mediawiki')
-    #     events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                '../data/validator_tests/bids_events_bad_category_key.tsv')
-    #
-    #     hed_schema = schema.load_schema(schema_path)
-    #     json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-    #                                              "../data/validator_tests/bids_events.json")
-    #     validator = HedValidator(hed_schema=hed_schema)
-    #     sidecar = Sidecar(json_path)
-    #     issues = sidecar.validate_entries(validator)
-    #     self.assertEqual(len(issues), 0)
-    #     input_file = TabularInput(events_path, sidecars=sidecar)
-    #
-    #     validation_issues = input_file.validate_sidecar(validator)
-    #     self.assertEqual(len(validation_issues), 0)
-    #     validation_issues = input_file.validate_file(validator, check_for_warnings=True)
-    #     self.assertEqual(len(validation_issues), 1)
-
-
-class TestInsertColumns(unittest.TestCase):
-
-    def test_insert_columns_simple(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, Action"],
-            "column2": ["Item"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Action"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_insert_columns_multiple_rows(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, Action", "Event, Action"],
-            "column2": ["Item", "Subject"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Action", "Event, Action"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    # def test_insert_columns_no_circular_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column2], Event, Action"],
-    #         "column2": ["[column1], Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-    def test_insert_columns_multiple_columns(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, [column3], Action"],
-            "column2": ["Item"],
-            "column3": ["Subject"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Subject, Action"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_insert_columns_invalid_column_name(self):
-        df = pd.DataFrame({
-            "column1": ["[invalid_column], Event, Action"],
-            "column2": ["Item"]
-        })
-        with self.assertRaises(ValueError):
-            result = BaseInput._insert_columns(df)
-
-    def test_insert_columns_four_columns(self):
-        df = pd.DataFrame({
-            "column1": ["[column2], Event, [column3], Action"],
-            "column2": ["Item"],
-            "column3": ["Subject"],
-            "column4": ["Data"]
-        })
-        expected_df = pd.DataFrame({
-            "column1": ["Item, Event, Subject, Action"],
-            "column4": ["Data"]
-        })
-        result = BaseInput._insert_columns(df)
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    # def test_insert_columns_invalid_syntax(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["column2], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-    # def test_insert_columns_no_self_reference(self):
-    #     df = pd.DataFrame({
-    #         "column1": ["[column1], Event, Action"],
-    #         "column2": ["Item"]
-    #     })
-    #     with self.assertRaises(ValueError):
-    #         result = BaseInput._insert_columns(df)
-
-
-class TestCombineDataframe(unittest.TestCase):
-    def test_combine_dataframe_with_strings(self):
-        data = {
-            'A': ['apple', 'banana', 'cherry'],
-            'B': ['dog', 'elephant', 'fox'],
-            'C': ['guitar', 'harmonica', 'piano']
-        }
-        df = pd.DataFrame(data)
-        result = BaseInput.combine_dataframe(df)
-        expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano'])
-        self.assertTrue(result.equals(expected))
-
-    def test_combine_dataframe_with_nan_values(self):
-        data = {
-            'A': ['apple', np.nan, 'cherry'],
-            'B': [np.nan, 'elephant', 'fox'],
-            'C': ['guitar', 'harmonica', np.nan]
-        }
-        df = pd.DataFrame(data)
-        result = BaseInput.combine_dataframe(df)
-        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
-        self.assertTrue(result.equals(expected))
-
-    def test_combine_dataframe_with_empty_values(self):
-        data = {
-            'A': ['apple', '', 'cherry'],
-            'B': ['', 'elephant', 'fox'],
-            'C': ['guitar', 'harmonica', '']
-        }
-        df = pd.DataFrame(data)
-        result = BaseInput.combine_dataframe(df)
-        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox'])
-        self.assertTrue(result.equals(expected))
-
-    def test_combine_dataframe_with_mixed_values(self):
-        data = {
-            'A': ['apple', np.nan, 'cherry', 'n/a', ''],
-            'B': [np.nan, 'elephant', 'fox', 'n/a', ''],
-            'C': ['guitar', 'harmonica', np.nan, 'n/a', '']
-        }
-        df = pd.DataFrame(data)
-        csv_buffer = io.StringIO()
-        df.to_csv(csv_buffer, header=False, index=False)
-        csv_buffer.seek(0)
-
-        # Use the same loading function we normally use to verify n/a translates right.
-        loaded_df = pd.read_csv(csv_buffer, header=None)
-        result = BaseInput.combine_dataframe(loaded_df)
-        expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', ''])
-        self.assertTrue(result.equals(expected))
-
-
-class TestColumnRefs(unittest.TestCase):
-    def test_simple_column_refs(self):
-        data1 = {
-            'A': ['[col1], [col2]', 'tag1, tag2'],
-            'B': ['tag3, tag4', '[col3]'],
-        }
-        df1 = pd.DataFrame(data1)
-        result1 = BaseInput._find_column_refs(df1)
-        expected1 = ['col1', 'col2', 'col3']
-        self.assertEqual(result1, expected1)
-
-    def test_mixed_cases_and_patterns(self):
-        data2 = {
-            'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'],
-        }
-        df2 = pd.DataFrame(data2)
-        result2 = BaseInput._find_column_refs(df2)
-        expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6']
-        self.assertEqual(result2, expected2)
-
-    def test_no_column_references(self):
-        data3 = {
-            'A': ['tag1, tag2', 'tag3, tag4'],
-            'B': ['tag5, tag6', 'tag7, tag8'],
-        }
-        df3 = pd.DataFrame(data3)
-        result3 = BaseInput._find_column_refs(df3)
-        expected3 = []
-        self.assertEqual(result3, expected3)
-
-    def test_incomplete_square_brackets(self):
-        data4 = {
-            'A': ['[col1, [col2]', 'tag1, [Col3'],
-            'B': ['tag3, [COL4', '[col5, col6]'],
-        }
-        df4 = pd.DataFrame(data4)
-        result4 = BaseInput._find_column_refs(df4)
-        expected4 = ['col2']
-        self.assertEqual(result4, expected4)
\ No newline at end of file
diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py
index e10e2a4a3..bc9c907b7 100644
--- a/tests/models/test_df_util.py
+++ b/tests/models/test_df_util.py
@@ -3,7 +3,7 @@
 
 
 from hed import load_schema_version
-from hed.models.df_util import shrink_defs, expand_defs, convert_to_form
+from hed.models.df_util import shrink_defs, expand_defs
 from hed import DefinitionDict
 
 
@@ -111,45 +111,4 @@ def test_expand_defs_series_placeholder(self):
         series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"])
         expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"])
         result = expand_defs(series, self.schema, self.def_dict, None)
-        pd.testing.assert_series_equal(result, expected_series)
-
-
-class TestConvertToForm(unittest.TestCase):
-    def setUp(self):
-        self.schema = load_schema_version()
-
-    def test_convert_to_form_short_tags(self):
-        df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
-        expected_df = pd.DataFrame({"column1": ["Azure,See"]})
-        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_convert_to_form_long_tags(self):
-        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]})
-        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]})
-        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_convert_to_form_series_short_tags(self):
-        series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
-        expected_series = pd.Series(["Azure,See"])
-        result = convert_to_form(series, self.schema, "short_tag")
-        pd.testing.assert_series_equal(result, expected_series)
-
-    def test_convert_to_form_series_long_tags(self):
-        series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"])
-        expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"])
-        result = convert_to_form(series, self.schema, "long_tag")
-        pd.testing.assert_series_equal(result, expected_series)
-
-    def test_convert_to_form_multiple_tags_short(self):
-        df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        expected_df = pd.DataFrame({"column1": ["Azure,Nose,4.5 m-per-s^2"]})
-        result = convert_to_form(df, self.schema, "short_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
-
-    def test_convert_to_form_multiple_tags_long(self):
-        df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]})
-        result = convert_to_form(df, self.schema, "long_tag", ['column1'])
-        pd.testing.assert_frame_equal(result, expected_df)
\ No newline at end of file
+        pd.testing.assert_series_equal(result, expected_series)
\ No newline at end of file