diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 869bc4ea6..af6249f56 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -7,6 +7,7 @@ from hed.models.column_mapper import ColumnMapper from hed.errors.exceptions import HedFileError, HedExceptions from hed.errors.error_reporter import ErrorHandler +import pandas as pd class BaseInput: @@ -364,45 +365,63 @@ def assemble(self, mapper=None): """ if mapper is None: mapper = self._mapper - import pandas as pd + transformers, need_categorical = mapper.get_transformers() if not transformers: - return None + return self._dataframe all_columns = self._dataframe if need_categorical: all_columns[need_categorical] = all_columns[need_categorical].astype('category') all_columns = all_columns.transform(transformers) - possible_column_references = [f"{column_name}" for column_name in self.columns if - column_name.lower() != "hed"] + return self._insert_columns(all_columns, list(transformers.keys())) + + @staticmethod + def _find_column_refs(df): found_column_references = [] - for column_name in all_columns: - df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE) - u_vals = pd.Series([j for i in df for j in i], dtype=str) + for column_name in df: + df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE) + u_vals = pd.Series([j for i in df_temp if isinstance(i, list) for j in i], dtype=str) u_vals = u_vals.unique() for val in u_vals: if val not in found_column_references: found_column_references.append(val) + return found_column_references + + @staticmethod + def _insert_columns(df, known_columns=None): + if known_columns is None: + known_columns = list(df.columns) + possible_column_references = [f"{column_name}" for column_name in df.columns if + isinstance(column_name, str) and column_name.lower() != "hed"] + found_column_references = BaseInput._find_column_refs(df) + + invalid_replacements = [col for col in found_column_references if col not in possible_column_references] + if invalid_replacements: + # todo: This check may be moved to validation + raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}") valid_replacements = [col for col in found_column_references if col in possible_column_references] - column_names = list(transformers.keys()) + # todo: break this into a sub function(probably) + column_names = known_columns for column_name in valid_replacements: column_names.remove(column_name) - saved_columns = all_columns[valid_replacements] + saved_columns = df[valid_replacements] for column_name in column_names: for replacing_name in valid_replacements: column_name_brackets = f"[{replacing_name}]" - all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y - in zip(all_columns[column_name], saved_columns[replacing_name])) - all_columns = all_columns[column_names] + df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y + in zip(df[column_name], saved_columns[replacing_name])) + df = df[column_names] - return all_columns + return df @staticmethod def combine_dataframe(dataframe): - """ Combines all columns in the given dataframe into a single hed string series. + """ Combines all columns in the given dataframe into a single HED string series, + skipping empty columns and columns with empty strings. Parameters: dataframe(Dataframe): The dataframe to combine @@ -410,8 +429,8 @@ def combine_dataframe(dataframe): Returns: Series: the assembled series """ - dataframe = dataframe.agg(', '.join, axis=1) - - # Potentially better ways to handle removing n/a by never inserting them to begin with. - dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True) - return dataframe + dataframe = dataframe.apply( + lambda x: ', '.join(filter(lambda e: bool(e) and e != "n/a", map(str, x))), + axis=1 + ) + return dataframe \ No newline at end of file diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index fe864b28e..75f2de5b9 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -112,7 +112,7 @@ def expand_defs(self): replacements = [] for tag in def_tags: - if not tag._expanded: + if tag.expandable and not tag.expanded: replacements.append((tag, tag._expandable)) for tag, group in replacements: diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 136b5aa73..ba1f341ac 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -41,6 +41,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): # Check the structure of the input data, if it's a BaseInput if isinstance(data, BaseInput): issues += self._validate_column_structure(data, error_handler) + # todo ian: Add more checks here for column inserters data = data.dataframe_a # Check the rows of the input data diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py deleted file mode 100644 index 8314072bd..000000000 --- a/tests/models/test_base_file_input.py +++ /dev/null @@ -1,103 +0,0 @@ -import unittest -import os -import shutil -from hed import Sidecar -from hed import BaseInput, TabularInput -from hed.models.column_mapper import ColumnMapper -from hed.models import DefinitionDict -from hed import schema - -# TODO: Add tests for base_file_input and include correct handling of 'n/a' - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - # todo: clean up these unit tests/add more - base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) - cls.base_data_dir = base_data_dir - json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") - # cls.json_def_filename = json_def_filename - json_def_sidecar = Sidecar(json_def_filename) - events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv') - cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar) - - base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") - cls.base_output_folder = base_output - os.makedirs(base_output, exist_ok=True) - - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/bids_tests/eeg_ds003645s_hed')) - schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.0.0.xml')) - cls.bids_root_path = bids_root_path - json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - - cls.hed_schema = schema.load_schema(schema_path) - sidecar1 = Sidecar(json_path, name='face_sub1_json') - mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False) - cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True, - name="face_sub1_events", mapper=mapper1, allow_blank_names=False) - cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events") - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.base_output_folder) - - def test_gathered_defs(self): - # todo: add unit tests for definitions in tsv file - defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) - expected_defs = { - 'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)', - 'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)', - 'jsonfiledef3': '(Item/JsonDef3/#)', - 'takesvaluedef': '(Age/#)', - 'valueclassdef': '(Acceleration/#)' - } - self.assertEqual(defs, expected_defs) - - # def test_missing_column_name_issue(self): - # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_schema.mediawiki') - # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_events_bad_column_name.tsv') - # - # hed_schema = schema.load_schema(schema_path) - # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # "../data/validator_tests/bids_events.json") - # validator = HedValidator(hed_schema=hed_schema) - # sidecar = Sidecar(json_path) - # issues = sidecar.validate_entries(validator) - # self.assertEqual(len(issues), 0) - # input_file = TabularInput(events_path, sidecars=sidecar) - # - # validation_issues = input_file.validate_sidecar(validator) - # self.assertEqual(len(validation_issues), 0) - # validation_issues = input_file.validate_file(validator, check_for_warnings=True) - # self.assertEqual(len(validation_issues), 1) - # - # def test_expand_column_issues(self): - # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_schema.mediawiki') - # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_events_bad_category_key.tsv') - # - # hed_schema = schema.load_schema(schema_path) - # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # "../data/validator_tests/bids_events.json") - # validator = HedValidator(hed_schema=hed_schema) - # sidecar = Sidecar(json_path) - # issues = sidecar.validate_entries(validator) - # self.assertEqual(len(issues), 0) - # input_file = TabularInput(events_path, sidecars=sidecar) - # - # validation_issues = input_file.validate_sidecar(validator) - # self.assertEqual(len(validation_issues), 0) - # validation_issues = input_file.validate_file(validator, check_for_warnings=True) - # self.assertEqual(len(validation_issues), 1) - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py new file mode 100644 index 000000000..8404be04e --- /dev/null +++ b/tests/models/test_base_input.py @@ -0,0 +1,282 @@ +import io +import unittest +import os +import shutil +from hed import Sidecar +from hed import BaseInput, TabularInput +from hed.models.column_mapper import ColumnMapper +from hed.models import DefinitionDict +from hed import schema +import pandas as pd +import numpy as np + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + # todo: clean up these unit tests/add more + base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) + cls.base_data_dir = base_data_dir + json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") + # cls.json_def_filename = json_def_filename + json_def_sidecar = Sidecar(json_def_filename) + events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv') + cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar) + + base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") + cls.base_output_folder = base_output + os.makedirs(base_output, exist_ok=True) + + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/bids_tests/eeg_ds003645s_hed')) + schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/HED8.0.0.xml')) + cls.bids_root_path = bids_root_path + json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + + cls.hed_schema = schema.load_schema(schema_path) + sidecar1 = Sidecar(json_path, name='face_sub1_json') + mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False) + cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True, + name="face_sub1_events", mapper=mapper1, allow_blank_names=False) + cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events") + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.base_output_folder) + + def test_gathered_defs(self): + # todo: add unit tests for definitions in tsv file + defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) + expected_defs = { + 'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)', + 'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)', + 'jsonfiledef3': '(Item/JsonDef3/#)', + 'takesvaluedef': '(Age/#)', + 'valueclassdef': '(Acceleration/#)' + } + self.assertEqual(defs, expected_defs) + + # def test_missing_column_name_issue(self): + # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_schema.mediawiki') + # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_events_bad_column_name.tsv') + # + # hed_schema = schema.load_schema(schema_path) + # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # "../data/validator_tests/bids_events.json") + # validator = HedValidator(hed_schema=hed_schema) + # sidecar = Sidecar(json_path) + # issues = sidecar.validate_entries(validator) + # self.assertEqual(len(issues), 0) + # input_file = TabularInput(events_path, sidecars=sidecar) + # + # validation_issues = input_file.validate_sidecar(validator) + # self.assertEqual(len(validation_issues), 0) + # validation_issues = input_file.validate_file(validator, check_for_warnings=True) + # self.assertEqual(len(validation_issues), 1) + # + # def test_expand_column_issues(self): + # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_schema.mediawiki') + # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_events_bad_category_key.tsv') + # + # hed_schema = schema.load_schema(schema_path) + # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # "../data/validator_tests/bids_events.json") + # validator = HedValidator(hed_schema=hed_schema) + # sidecar = Sidecar(json_path) + # issues = sidecar.validate_entries(validator) + # self.assertEqual(len(issues), 0) + # input_file = TabularInput(events_path, sidecars=sidecar) + # + # validation_issues = input_file.validate_sidecar(validator) + # self.assertEqual(len(validation_issues), 0) + # validation_issues = input_file.validate_file(validator, check_for_warnings=True) + # self.assertEqual(len(validation_issues), 1) + + +class TestInsertColumns(unittest.TestCase): + + def test_insert_columns_simple(self): + df = pd.DataFrame({ + "column1": ["[column2], Event, Action"], + "column2": ["Item"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Action"] + }) + result = BaseInput._insert_columns(df) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_multiple_rows(self): + df = pd.DataFrame({ + "column1": ["[column2], Event, Action", "Event, Action"], + "column2": ["Item", "Subject"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Action", "Event, Action"] + }) + result = BaseInput._insert_columns(df) + pd.testing.assert_frame_equal(result, expected_df) + + # def test_insert_columns_no_circular_reference(self): + # df = pd.DataFrame({ + # "column1": ["[column2], Event, Action"], + # "column2": ["[column1], Item"] + # }) + # with self.assertRaises(ValueError): + # result = BaseInput._insert_columns(df) + + def test_insert_columns_multiple_columns(self): + df = pd.DataFrame({ + "column1": ["[column2], Event, [column3], Action"], + "column2": ["Item"], + "column3": ["Subject"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Subject, Action"] + }) + result = BaseInput._insert_columns(df) + pd.testing.assert_frame_equal(result, expected_df) + + def test_insert_columns_invalid_column_name(self): + df = pd.DataFrame({ + "column1": ["[invalid_column], Event, Action"], + "column2": ["Item"] + }) + with self.assertRaises(ValueError): + result = BaseInput._insert_columns(df) + + def test_insert_columns_four_columns(self): + df = pd.DataFrame({ + "column1": ["[column2], Event, [column3], Action"], + "column2": ["Item"], + "column3": ["Subject"], + "column4": ["Data"] + }) + expected_df = pd.DataFrame({ + "column1": ["Item, Event, Subject, Action"], + "column4": ["Data"] + }) + result = BaseInput._insert_columns(df) + pd.testing.assert_frame_equal(result, expected_df) + + # def test_insert_columns_invalid_syntax(self): + # df = pd.DataFrame({ + # "column1": ["column2], Event, Action"], + # "column2": ["Item"] + # }) + # with self.assertRaises(ValueError): + # result = BaseInput._insert_columns(df) + + # def test_insert_columns_no_self_reference(self): + # df = pd.DataFrame({ + # "column1": ["[column1], Event, Action"], + # "column2": ["Item"] + # }) + # with self.assertRaises(ValueError): + # result = BaseInput._insert_columns(df) + + +class TestCombineDataframe(unittest.TestCase): + def test_combine_dataframe_with_strings(self): + data = { + 'A': ['apple', 'banana', 'cherry'], + 'B': ['dog', 'elephant', 'fox'], + 'C': ['guitar', 'harmonica', 'piano'] + } + df = pd.DataFrame(data) + result = BaseInput.combine_dataframe(df) + expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano']) + self.assertTrue(result.equals(expected)) + + def test_combine_dataframe_with_nan_values(self): + data = { + 'A': ['apple', np.nan, 'cherry'], + 'B': [np.nan, 'elephant', 'fox'], + 'C': ['guitar', 'harmonica', np.nan] + } + df = pd.DataFrame(data) + # this is called on load normally + df = df.fillna("n/a") + result = BaseInput.combine_dataframe(df) + expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) + self.assertTrue(result.equals(expected)) + + def test_combine_dataframe_with_empty_values(self): + data = { + 'A': ['apple', '', 'cherry'], + 'B': ['', 'elephant', 'fox'], + 'C': ['guitar', 'harmonica', ''] + } + df = pd.DataFrame(data) + + result = BaseInput.combine_dataframe(df) + expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) + self.assertTrue(result.equals(expected)) + + def test_combine_dataframe_with_mixed_values(self): + data = { + 'A': ['apple', np.nan, 'cherry', 'n/a', ''], + 'B': [np.nan, 'elephant', 'fox', 'n/a', ''], + 'C': ['guitar', 'harmonica', np.nan, 'n/a', ''] + } + df = pd.DataFrame(data) + # this is called on load normally + df = df.fillna("n/a") + csv_buffer = io.StringIO() + df.to_csv(csv_buffer, header=False, index=False) + csv_buffer.seek(0) + + # Use the same loading function we normally use to verify n/a translates right. + loaded_df = pd.read_csv(csv_buffer, header=None) + loaded_df = loaded_df.fillna("n/a") + result = BaseInput.combine_dataframe(loaded_df) + expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', '']) + self.assertTrue(result.equals(expected)) + + +class TestColumnRefs(unittest.TestCase): + def test_simple_column_refs(self): + data1 = { + 'A': ['[col1], [col2]', 'tag1, tag2'], + 'B': ['tag3, tag4', '[col3]'], + } + df1 = pd.DataFrame(data1) + result1 = BaseInput._find_column_refs(df1) + expected1 = ['col1', 'col2', 'col3'] + self.assertEqual(result1, expected1) + + def test_mixed_cases_and_patterns(self): + data2 = { + 'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'], + } + df2 = pd.DataFrame(data2) + result2 = BaseInput._find_column_refs(df2) + expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6'] + self.assertEqual(result2, expected2) + + def test_no_column_references(self): + data3 = { + 'A': ['tag1, tag2', 'tag3, tag4'], + 'B': ['tag5, tag6', 'tag7, tag8'], + } + df3 = pd.DataFrame(data3) + result3 = BaseInput._find_column_refs(df3) + expected3 = [] + self.assertEqual(result3, expected3) + + def test_incomplete_square_brackets(self): + data4 = { + 'A': ['[col1, [col2]', 'tag1, [Col3'], + 'B': ['tag3, [COL4', '[col5, col6]'], + } + df4 = pd.DataFrame(data4) + result4 = BaseInput._find_column_refs(df4) + expected4 = ['col2'] + self.assertEqual(result4, expected4) \ No newline at end of file diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index bc9c907b7..2f1823e9d 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -3,7 +3,7 @@ from hed import load_schema_version -from hed.models.df_util import shrink_defs, expand_defs +from hed.models.df_util import shrink_defs, expand_defs, convert_to_form from hed import DefinitionDict @@ -111,4 +111,45 @@ def test_expand_defs_series_placeholder(self): series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]) result = expand_defs(series, self.schema, self.def_dict, None) - pd.testing.assert_series_equal(result, expected_series) \ No newline at end of file + pd.testing.assert_series_equal(result, expected_series) + + +class TestConvertToForm(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version() + + def test_convert_to_form_short_tags(self): + df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) + expected_df = pd.DataFrame({"column1": ["Azure,See"]}) + result = convert_to_form(df, self.schema, "short_tag", ['column1']) + pd.testing.assert_frame_equal(result, expected_df) + + def test_convert_to_form_long_tags(self): + df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]}) + expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) + result = convert_to_form(df, self.schema, "long_tag", ['column1']) + pd.testing.assert_frame_equal(result, expected_df) + + def test_convert_to_form_series_short_tags(self): + series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) + expected_series = pd.Series(["Azure,See"]) + result = convert_to_form(series, self.schema, "short_tag") + pd.testing.assert_series_equal(result, expected_series) + + def test_convert_to_form_series_long_tags(self): + series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"]) + expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) + result = convert_to_form(series, self.schema, "long_tag") + pd.testing.assert_series_equal(result, expected_series) + + def test_convert_to_form_multiple_tags_short(self): + df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) + expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]}) + result = convert_to_form(df, self.schema, "short_tag", ['column1']) + pd.testing.assert_frame_equal(result, expected_df) + + def test_convert_to_form_multiple_tags_long(self): + df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]}) + expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) + result = convert_to_form(df, self.schema, "long_tag", ['column1']) + pd.testing.assert_frame_equal(result, expected_df) \ No newline at end of file