From c8db8ba8f511a98078a203f17b7a622c7e3c7170 Mon Sep 17 00:00:00 2001 From: VisLab <1189050+VisLab@users.noreply.github.com> Date: Sun, 19 Mar 2023 07:36:58 -0500 Subject: [PATCH] Revert "Add more unit tests. better nan and empty column handling" --- hed/models/base_input.py | 58 ++---- hed/validator/spreadsheet_validator.py | 1 - tests/models/test_base_file_input.py | 103 +++++++++ tests/models/test_base_input.py | 276 ------------------------- tests/models/test_df_util.py | 45 +--- 5 files changed, 126 insertions(+), 357 deletions(-) create mode 100644 tests/models/test_base_file_input.py delete mode 100644 tests/models/test_base_input.py diff --git a/hed/models/base_input.py b/hed/models/base_input.py index f50ea5e4c..869bc4ea6 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -7,7 +7,6 @@ from hed.models.column_mapper import ColumnMapper from hed.errors.exceptions import HedFileError, HedExceptions from hed.errors.error_reporter import ErrorHandler -import pandas as pd class BaseInput: @@ -67,7 +66,10 @@ def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=T elif not file: raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file) elif input_type in self.TEXT_EXTENSION: - self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, dtype=str) + self._dataframe = pandas.read_csv(file, delimiter='\t', header=pandas_header, + dtype=str, keep_default_na=True, na_values=None) + # Convert nan values to a known value + self._dataframe = self._dataframe.fillna("n/a") elif input_type in self.EXCEL_EXTENSION: self._loaded_workbook = openpyxl.load_workbook(file) loaded_worksheet = self.get_worksheet(self._worksheet_name) @@ -362,7 +364,7 @@ def assemble(self, mapper=None): """ if mapper is None: mapper = self._mapper - + import pandas as pd transformers, need_categorical = mapper.get_transformers() if not transformers: return None @@ -372,53 +374,35 @@ def assemble(self, mapper=None): all_columns = all_columns.transform(transformers) - return self._insert_columns(all_columns, list(transformers.keys())) - - @staticmethod - def _find_column_refs(df): + possible_column_references = [f"{column_name}" for column_name in self.columns if + column_name.lower() != "hed"] found_column_references = [] - for column_name in df: - df_temp = df[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE) - u_vals = pd.Series([j for i in df_temp for j in i], dtype=str) + for column_name in all_columns: + df = all_columns[column_name].str.findall("\[([a-z_\-0-9]+)\]", re.IGNORECASE) + u_vals = pd.Series([j for i in df for j in i], dtype=str) u_vals = u_vals.unique() for val in u_vals: if val not in found_column_references: found_column_references.append(val) - return found_column_references - - @staticmethod - def _insert_columns(df, known_columns=None): - if known_columns is None: - known_columns = list(df.columns) - possible_column_references = [f"{column_name}" for column_name in df.columns if - column_name.lower() != "hed"] - found_column_references = BaseInput._find_column_refs(df) - - invalid_replacements = [col for col in found_column_references if col not in possible_column_references] - if invalid_replacements: - # todo: This check may be moved to validation - raise ValueError(f"Bad column references found(columns do not exist): {invalid_replacements}") valid_replacements = [col for col in found_column_references if col in possible_column_references] - # todo: break this into a sub function(probably) - column_names = known_columns + column_names = list(transformers.keys()) for column_name in valid_replacements: column_names.remove(column_name) - saved_columns = df[valid_replacements] + saved_columns = all_columns[valid_replacements] for column_name in column_names: for replacing_name in valid_replacements: column_name_brackets = f"[{replacing_name}]" - df[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y - in zip(df[column_name], saved_columns[replacing_name])) - df = df[column_names] + all_columns[column_name] = pd.Series(x.replace(column_name_brackets, y) for x, y + in zip(all_columns[column_name], saved_columns[replacing_name])) + all_columns = all_columns[column_names] - return df + return all_columns @staticmethod def combine_dataframe(dataframe): - """ Combines all columns in the given dataframe into a single HED string series, - skipping empty columns and columns with empty strings. + """ Combines all columns in the given dataframe into a single hed string series. Parameters: dataframe(Dataframe): The dataframe to combine @@ -426,8 +410,8 @@ def combine_dataframe(dataframe): Returns: Series: the assembled series """ - dataframe = dataframe.agg( - lambda x: ', '.join(filter(lambda e: pd.notna(e) and e != "", x)), axis=1 - ) + dataframe = dataframe.agg(', '.join, axis=1) - return dataframe \ No newline at end of file + # Potentially better ways to handle removing n/a by never inserting them to begin with. + dataframe = dataframe.replace("(, n/a|n/a,)", "", regex=True) + return dataframe diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index ba1f341ac..136b5aa73 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -41,7 +41,6 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): # Check the structure of the input data, if it's a BaseInput if isinstance(data, BaseInput): issues += self._validate_column_structure(data, error_handler) - # todo ian: Add more checks here for column inserters data = data.dataframe_a # Check the rows of the input data diff --git a/tests/models/test_base_file_input.py b/tests/models/test_base_file_input.py new file mode 100644 index 000000000..8314072bd --- /dev/null +++ b/tests/models/test_base_file_input.py @@ -0,0 +1,103 @@ +import unittest +import os +import shutil +from hed import Sidecar +from hed import BaseInput, TabularInput +from hed.models.column_mapper import ColumnMapper +from hed.models import DefinitionDict +from hed import schema + +# TODO: Add tests for base_file_input and include correct handling of 'n/a' + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + # todo: clean up these unit tests/add more + base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) + cls.base_data_dir = base_data_dir + json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") + # cls.json_def_filename = json_def_filename + json_def_sidecar = Sidecar(json_def_filename) + events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv') + cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar) + + base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") + cls.base_output_folder = base_output + os.makedirs(base_output, exist_ok=True) + + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/bids_tests/eeg_ds003645s_hed')) + schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/HED8.0.0.xml')) + cls.bids_root_path = bids_root_path + json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + events_path = os.path.realpath(os.path.join(bids_root_path, + 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + + cls.hed_schema = schema.load_schema(schema_path) + sidecar1 = Sidecar(json_path, name='face_sub1_json') + mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False) + cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True, + name="face_sub1_events", mapper=mapper1, allow_blank_names=False) + cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events") + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.base_output_folder) + + def test_gathered_defs(self): + # todo: add unit tests for definitions in tsv file + defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) + expected_defs = { + 'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)', + 'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)', + 'jsonfiledef3': '(Item/JsonDef3/#)', + 'takesvaluedef': '(Age/#)', + 'valueclassdef': '(Acceleration/#)' + } + self.assertEqual(defs, expected_defs) + + # def test_missing_column_name_issue(self): + # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_schema.mediawiki') + # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_events_bad_column_name.tsv') + # + # hed_schema = schema.load_schema(schema_path) + # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # "../data/validator_tests/bids_events.json") + # validator = HedValidator(hed_schema=hed_schema) + # sidecar = Sidecar(json_path) + # issues = sidecar.validate_entries(validator) + # self.assertEqual(len(issues), 0) + # input_file = TabularInput(events_path, sidecars=sidecar) + # + # validation_issues = input_file.validate_sidecar(validator) + # self.assertEqual(len(validation_issues), 0) + # validation_issues = input_file.validate_file(validator, check_for_warnings=True) + # self.assertEqual(len(validation_issues), 1) + # + # def test_expand_column_issues(self): + # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_schema.mediawiki') + # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # '../data/validator_tests/bids_events_bad_category_key.tsv') + # + # hed_schema = schema.load_schema(schema_path) + # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + # "../data/validator_tests/bids_events.json") + # validator = HedValidator(hed_schema=hed_schema) + # sidecar = Sidecar(json_path) + # issues = sidecar.validate_entries(validator) + # self.assertEqual(len(issues), 0) + # input_file = TabularInput(events_path, sidecars=sidecar) + # + # validation_issues = input_file.validate_sidecar(validator) + # self.assertEqual(len(validation_issues), 0) + # validation_issues = input_file.validate_file(validator, check_for_warnings=True) + # self.assertEqual(len(validation_issues), 1) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py deleted file mode 100644 index 392599f78..000000000 --- a/tests/models/test_base_input.py +++ /dev/null @@ -1,276 +0,0 @@ -import io -import unittest -import os -import shutil -from hed import Sidecar -from hed import BaseInput, TabularInput -from hed.models.column_mapper import ColumnMapper -from hed.models import DefinitionDict -from hed import schema -import pandas as pd -import numpy as np - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - # todo: clean up these unit tests/add more - base_data_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../data/')) - cls.base_data_dir = base_data_dir - json_def_filename = os.path.join(base_data_dir, "sidecar_tests/both_types_events_with_defs.json") - # cls.json_def_filename = json_def_filename - json_def_sidecar = Sidecar(json_def_filename) - events_path = os.path.join(base_data_dir, '../data/validator_tests/bids_events_no_index.tsv') - cls.tabular_file = TabularInput(events_path, sidecar=json_def_sidecar) - - base_output = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/tests_output/") - cls.base_output_folder = base_output - os.makedirs(base_output, exist_ok=True) - - bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/bids_tests/eeg_ds003645s_hed')) - schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.0.0.xml')) - cls.bids_root_path = bids_root_path - json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) - events_path = os.path.realpath(os.path.join(bids_root_path, - 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) - - cls.hed_schema = schema.load_schema(schema_path) - sidecar1 = Sidecar(json_path, name='face_sub1_json') - mapper1 = ColumnMapper(sidecar=sidecar1, optional_tag_columns=['HED'], warn_on_missing_column=False) - cls.input_data1 = BaseInput(events_path, file_type='.tsv', has_column_names=True, - name="face_sub1_events", mapper=mapper1, allow_blank_names=False) - cls.input_data2 = BaseInput(events_path, file_type='.tsv', has_column_names=True, name="face_sub2_events") - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.base_output_folder) - - def test_gathered_defs(self): - # todo: add unit tests for definitions in tsv file - defs = DefinitionDict.get_as_strings(self.tabular_file._sidecar.extract_definitions(hed_schema=self.hed_schema)) - expected_defs = { - 'jsonfiledef': '(Item/JsonDef1/#,Item/JsonDef1)', - 'jsonfiledef2': '(Item/JsonDef2/#,Item/JsonDef2)', - 'jsonfiledef3': '(Item/JsonDef3/#)', - 'takesvaluedef': '(Age/#)', - 'valueclassdef': '(Acceleration/#)' - } - self.assertEqual(defs, expected_defs) - - # def test_missing_column_name_issue(self): - # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_schema.mediawiki') - # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_events_bad_column_name.tsv') - # - # hed_schema = schema.load_schema(schema_path) - # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # "../data/validator_tests/bids_events.json") - # validator = HedValidator(hed_schema=hed_schema) - # sidecar = Sidecar(json_path) - # issues = sidecar.validate_entries(validator) - # self.assertEqual(len(issues), 0) - # input_file = TabularInput(events_path, sidecars=sidecar) - # - # validation_issues = input_file.validate_sidecar(validator) - # self.assertEqual(len(validation_issues), 0) - # validation_issues = input_file.validate_file(validator, check_for_warnings=True) - # self.assertEqual(len(validation_issues), 1) - # - # def test_expand_column_issues(self): - # schema_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_schema.mediawiki') - # events_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # '../data/validator_tests/bids_events_bad_category_key.tsv') - # - # hed_schema = schema.load_schema(schema_path) - # json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), - # "../data/validator_tests/bids_events.json") - # validator = HedValidator(hed_schema=hed_schema) - # sidecar = Sidecar(json_path) - # issues = sidecar.validate_entries(validator) - # self.assertEqual(len(issues), 0) - # input_file = TabularInput(events_path, sidecars=sidecar) - # - # validation_issues = input_file.validate_sidecar(validator) - # self.assertEqual(len(validation_issues), 0) - # validation_issues = input_file.validate_file(validator, check_for_warnings=True) - # self.assertEqual(len(validation_issues), 1) - - -class TestInsertColumns(unittest.TestCase): - - def test_insert_columns_simple(self): - df = pd.DataFrame({ - "column1": ["[column2], Event, Action"], - "column2": ["Item"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Action"] - }) - result = BaseInput._insert_columns(df) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_multiple_rows(self): - df = pd.DataFrame({ - "column1": ["[column2], Event, Action", "Event, Action"], - "column2": ["Item", "Subject"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Action", "Event, Action"] - }) - result = BaseInput._insert_columns(df) - pd.testing.assert_frame_equal(result, expected_df) - - # def test_insert_columns_no_circular_reference(self): - # df = pd.DataFrame({ - # "column1": ["[column2], Event, Action"], - # "column2": ["[column1], Item"] - # }) - # with self.assertRaises(ValueError): - # result = BaseInput._insert_columns(df) - - def test_insert_columns_multiple_columns(self): - df = pd.DataFrame({ - "column1": ["[column2], Event, [column3], Action"], - "column2": ["Item"], - "column3": ["Subject"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Subject, Action"] - }) - result = BaseInput._insert_columns(df) - pd.testing.assert_frame_equal(result, expected_df) - - def test_insert_columns_invalid_column_name(self): - df = pd.DataFrame({ - "column1": ["[invalid_column], Event, Action"], - "column2": ["Item"] - }) - with self.assertRaises(ValueError): - result = BaseInput._insert_columns(df) - - def test_insert_columns_four_columns(self): - df = pd.DataFrame({ - "column1": ["[column2], Event, [column3], Action"], - "column2": ["Item"], - "column3": ["Subject"], - "column4": ["Data"] - }) - expected_df = pd.DataFrame({ - "column1": ["Item, Event, Subject, Action"], - "column4": ["Data"] - }) - result = BaseInput._insert_columns(df) - pd.testing.assert_frame_equal(result, expected_df) - - # def test_insert_columns_invalid_syntax(self): - # df = pd.DataFrame({ - # "column1": ["column2], Event, Action"], - # "column2": ["Item"] - # }) - # with self.assertRaises(ValueError): - # result = BaseInput._insert_columns(df) - - # def test_insert_columns_no_self_reference(self): - # df = pd.DataFrame({ - # "column1": ["[column1], Event, Action"], - # "column2": ["Item"] - # }) - # with self.assertRaises(ValueError): - # result = BaseInput._insert_columns(df) - - -class TestCombineDataframe(unittest.TestCase): - def test_combine_dataframe_with_strings(self): - data = { - 'A': ['apple', 'banana', 'cherry'], - 'B': ['dog', 'elephant', 'fox'], - 'C': ['guitar', 'harmonica', 'piano'] - } - df = pd.DataFrame(data) - result = BaseInput.combine_dataframe(df) - expected = pd.Series(['apple, dog, guitar', 'banana, elephant, harmonica', 'cherry, fox, piano']) - self.assertTrue(result.equals(expected)) - - def test_combine_dataframe_with_nan_values(self): - data = { - 'A': ['apple', np.nan, 'cherry'], - 'B': [np.nan, 'elephant', 'fox'], - 'C': ['guitar', 'harmonica', np.nan] - } - df = pd.DataFrame(data) - result = BaseInput.combine_dataframe(df) - expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) - self.assertTrue(result.equals(expected)) - - def test_combine_dataframe_with_empty_values(self): - data = { - 'A': ['apple', '', 'cherry'], - 'B': ['', 'elephant', 'fox'], - 'C': ['guitar', 'harmonica', ''] - } - df = pd.DataFrame(data) - result = BaseInput.combine_dataframe(df) - expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox']) - self.assertTrue(result.equals(expected)) - - def test_combine_dataframe_with_mixed_values(self): - data = { - 'A': ['apple', np.nan, 'cherry', 'n/a', ''], - 'B': [np.nan, 'elephant', 'fox', 'n/a', ''], - 'C': ['guitar', 'harmonica', np.nan, 'n/a', ''] - } - df = pd.DataFrame(data) - csv_buffer = io.StringIO() - df.to_csv(csv_buffer, header=False, index=False) - csv_buffer.seek(0) - - # Use the same loading function we normally use to verify n/a translates right. - loaded_df = pd.read_csv(csv_buffer, header=None) - result = BaseInput.combine_dataframe(loaded_df) - expected = pd.Series(['apple, guitar', 'elephant, harmonica', 'cherry, fox', '', '']) - self.assertTrue(result.equals(expected)) - - -class TestColumnRefs(unittest.TestCase): - def test_simple_column_refs(self): - data1 = { - 'A': ['[col1], [col2]', 'tag1, tag2'], - 'B': ['tag3, tag4', '[col3]'], - } - df1 = pd.DataFrame(data1) - result1 = BaseInput._find_column_refs(df1) - expected1 = ['col1', 'col2', 'col3'] - self.assertEqual(result1, expected1) - - def test_mixed_cases_and_patterns(self): - data2 = { - 'A': ['[Col1], [col2]', 'tag1, [Col3]', 'tag3, [COL4]', '[col5], [col6]'], - } - df2 = pd.DataFrame(data2) - result2 = BaseInput._find_column_refs(df2) - expected2 = ['Col1', 'col2', 'Col3', 'COL4', 'col5', 'col6'] - self.assertEqual(result2, expected2) - - def test_no_column_references(self): - data3 = { - 'A': ['tag1, tag2', 'tag3, tag4'], - 'B': ['tag5, tag6', 'tag7, tag8'], - } - df3 = pd.DataFrame(data3) - result3 = BaseInput._find_column_refs(df3) - expected3 = [] - self.assertEqual(result3, expected3) - - def test_incomplete_square_brackets(self): - data4 = { - 'A': ['[col1, [col2]', 'tag1, [Col3'], - 'B': ['tag3, [COL4', '[col5, col6]'], - } - df4 = pd.DataFrame(data4) - result4 = BaseInput._find_column_refs(df4) - expected4 = ['col2'] - self.assertEqual(result4, expected4) \ No newline at end of file diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index e10e2a4a3..bc9c907b7 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -3,7 +3,7 @@ from hed import load_schema_version -from hed.models.df_util import shrink_defs, expand_defs, convert_to_form +from hed.models.df_util import shrink_defs, expand_defs from hed import DefinitionDict @@ -111,45 +111,4 @@ def test_expand_defs_series_placeholder(self): series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Action/TestDef1/123,Action/TestDef2)),Item/SomeItem"]) result = expand_defs(series, self.schema, self.def_dict, None) - pd.testing.assert_series_equal(result, expected_series) - - -class TestConvertToForm(unittest.TestCase): - def setUp(self): - self.schema = load_schema_version() - - def test_convert_to_form_short_tags(self): - df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) - expected_df = pd.DataFrame({"column1": ["Azure,See"]}) - result = convert_to_form(df, self.schema, "short_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) - - def test_convert_to_form_long_tags(self): - df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]}) - expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) - result = convert_to_form(df, self.schema, "long_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) - - def test_convert_to_form_series_short_tags(self): - series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) - expected_series = pd.Series(["Azure,See"]) - result = convert_to_form(series, self.schema, "short_tag") - pd.testing.assert_series_equal(result, expected_series) - - def test_convert_to_form_series_long_tags(self): - series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"]) - expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) - result = convert_to_form(series, self.schema, "long_tag") - pd.testing.assert_series_equal(result, expected_series) - - def test_convert_to_form_multiple_tags_short(self): - df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - expected_df = pd.DataFrame({"column1": ["Azure,Nose,4.5 m-per-s^2"]}) - result = convert_to_form(df, self.schema, "short_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) - - def test_convert_to_form_multiple_tags_long(self): - df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - result = convert_to_form(df, self.schema, "long_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) \ No newline at end of file + pd.testing.assert_series_equal(result, expected_series) \ No newline at end of file