From da7f4f62b736fe088287c6c1345ecb0160abe854 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 14 Jun 2023 17:54:13 -0500 Subject: [PATCH] Fix issue with df_util. Also make them not return the modified df/series --- hed/models/df_util.py | 12 ++---- tests/models/test_df_util.py | 80 ++++++++++++++++++------------------ tests/models/test_sidecar.py | 4 +- 3 files changed, 45 insertions(+), 51 deletions(-) diff --git a/hed/models/df_util.py b/hed/models/df_util.py index 2509a059a..ba378502a 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -59,14 +59,14 @@ def convert_to_form(df, hed_schema, tag_form, columns=None): """ Convert all tags in underlying dataframe to the specified form (in place). Parameters: - df (pd.Dataframe): The dataframe to modify + df (pd.Dataframe or pd.Series): The dataframe or series to modify hed_schema (HedSchema): The schema to use to convert tags. tag_form(str): HedTag property to convert tags to. columns (list): The columns to modify on the dataframe. """ if isinstance(df, pd.Series): - df = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form)) + df[:] = df.apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form)) else: if columns is None: columns = df.columns @@ -74,8 +74,6 @@ def convert_to_form(df, hed_schema, tag_form, columns=None): for column in columns: df[column] = df[column].apply(partial(_convert_to_form, hed_schema=hed_schema, tag_form=tag_form)) - return df - def shrink_defs(df, hed_schema, columns=None): """ Shrink (in place) any def-expand tags found in the specified columns in the dataframe. @@ -97,8 +95,6 @@ def shrink_defs(df, hed_schema, columns=None): mask = df[column].str.contains('Def-expand/', case=False) df[column][mask] = df[column][mask].apply(partial(_shrink_defs, hed_schema=hed_schema)) - return df - def expand_defs(df, hed_schema, def_dict, columns=None): """ Expands any def tags found in the dataframe. @@ -120,9 +116,7 @@ def expand_defs(df, hed_schema, def_dict, columns=None): for column in columns: mask = df[column].str.contains('Def/', case=False) - df[column][mask] = df[column][mask].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict)) - - return df + df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict)) def _convert_to_form(hed_string, hed_schema, tag_form): diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index c88446956..09f913466 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -14,54 +14,54 @@ def setUp(self): def test_shrink_defs_normal(self): df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]}) - result = shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_shrink_defs_placeholder(self): df = pd.DataFrame({"column1": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) expected_df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) - result = shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_shrink_defs_no_matching_tags(self): df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]}) expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent, Item/SomeItem,Acceleration/25)"]}) - result = shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_shrink_defs_multiple_columns(self): df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"], "column2": ["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"], "column2": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) - result = shrink_defs(df, self.schema, ['column1', 'column2']) - pd.testing.assert_frame_equal(result, expected_df) + shrink_defs(df, self.schema, ['column1', 'column2']) + pd.testing.assert_frame_equal(df, expected_df) def test_shrink_defs_multiple_defs_same_line(self): df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Acceleration/30"]}) expected_df = pd.DataFrame({"column1": ["Def/TestDefNormal,Def/TestDefPlaceholder/123,Acceleration/30"]}) - result = shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_shrink_defs_mixed_tags(self): df = pd.DataFrame({"column1": [ "(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent,(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem,Acceleration/25"]}) expected_df = pd.DataFrame( {"column1": ["Def/TestDefNormal,Event/SomeEvent,Def/TestDefPlaceholder/123,Item/SomeItem,Acceleration/25"]}) - result = shrink_defs(df, self.schema, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + shrink_defs(df, self.schema, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_shrink_defs_series_normal(self): series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]) expected_series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"]) - result = shrink_defs(series, self.schema, None) - pd.testing.assert_series_equal(result, expected_series) + shrink_defs(series, self.schema, None) + pd.testing.assert_series_equal(series, expected_series) def test_shrink_defs_series_placeholder(self): series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]) expected_series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) - result = shrink_defs(series, self.schema, None) - pd.testing.assert_series_equal(result, expected_series) + shrink_defs(series, self.schema, None) + pd.testing.assert_series_equal(series, expected_series) class TestExpandDefs(unittest.TestCase): @@ -75,21 +75,21 @@ def test_expand_defs_normal(self): df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"]}) expected_df = pd.DataFrame( {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) - result = expand_defs(df, self.schema, self.def_dict, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + expand_defs(df, self.schema, self.def_dict, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_expand_defs_placeholder(self): df = pd.DataFrame({"column1": ["Def/TestDefPlaceholder/123,Item/SomeItem"]}) expected_df = pd.DataFrame({"column1": [ "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) - result = expand_defs(df, self.schema, self.def_dict, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + expand_defs(df, self.schema, self.def_dict, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_expand_defs_no_matching_tags(self): df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]}) expected_df = pd.DataFrame({"column1": ["(Event/SomeEvent,Item/SomeItem,Acceleration/25)"]}) - result = expand_defs(df, self.schema, self.def_dict, ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + expand_defs(df, self.schema, self.def_dict, ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_expand_defs_multiple_columns(self): df = pd.DataFrame({"column1": ["Def/TestDefNormal,Event/SomeEvent"], @@ -98,20 +98,20 @@ def test_expand_defs_multiple_columns(self): {"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"], "column2": [ "(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]}) - result = expand_defs(df, self.schema, self.def_dict, ['column1', 'column2']) - pd.testing.assert_frame_equal(result, expected_df) + expand_defs(df, self.schema, self.def_dict, ['column1', 'column2']) + pd.testing.assert_frame_equal(df, expected_df) def test_expand_defs_series_normal(self): series = pd.Series(["Def/TestDefNormal,Event/SomeEvent"]) expected_series = pd.Series(["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]) - result = expand_defs(series, self.schema, self.def_dict, None) - pd.testing.assert_series_equal(result, expected_series) + expand_defs(series, self.schema, self.def_dict, None) + pd.testing.assert_series_equal(series, expected_series) def test_expand_defs_series_placeholder(self): series = pd.Series(["Def/TestDefPlaceholder/123,Item/SomeItem"]) expected_series = pd.Series(["(Def-expand/TestDefPlaceholder/123,(Acceleration/123,Action/TestDef2)),Item/SomeItem"]) - result = expand_defs(series, self.schema, self.def_dict, None) - pd.testing.assert_series_equal(result, expected_series) + expand_defs(series, self.schema, self.def_dict, None) + pd.testing.assert_series_equal(series, expected_series) class TestConvertToForm(unittest.TestCase): @@ -121,38 +121,38 @@ def setUp(self): def test_convert_to_form_short_tags(self): df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) expected_df = pd.DataFrame({"column1": ["Azure,See"]}) - result = convert_to_form(df, self.schema, "short_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + convert_to_form(df, self.schema, "short_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_convert_to_form_long_tags(self): df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Action/Perceive/See"]}) expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) - result = convert_to_form(df, self.schema, "long_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + convert_to_form(df, self.schema, "long_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_convert_to_form_series_short_tags(self): series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) expected_series = pd.Series(["Azure,See"]) - result = convert_to_form(series, self.schema, "short_tag") - pd.testing.assert_series_equal(result, expected_series) + convert_to_form(series, self.schema, "short_tag") + pd.testing.assert_series_equal(series, expected_series) def test_convert_to_form_series_long_tags(self): series = pd.Series(["CSS-color/White-color/Azure,Action/Perceive/See"]) expected_series = pd.Series(["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]) - result = convert_to_form(series, self.schema, "long_tag") - pd.testing.assert_series_equal(result, expected_series) + convert_to_form(series, self.schema, "long_tag") + pd.testing.assert_series_equal(series, expected_series) def test_convert_to_form_multiple_tags_short(self): df = pd.DataFrame({"column1": ["Visual-attribute/Color/CSS-color/White-color/Azure,Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) expected_df = pd.DataFrame({"column1": ["Azure,Nose,Acceleration/4.5 m-per-s^2"]}) - result = convert_to_form(df, self.schema, "short_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + convert_to_form(df, self.schema, "short_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_convert_to_form_multiple_tags_long(self): df = pd.DataFrame({"column1": ["CSS-color/White-color/Azure,Anatomical-item/Body-part/Head/Face/Nose,Rate-of-change/Acceleration/4.5 m-per-s^2"]}) expected_df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Item/Biological-item/Anatomical-item/Body-part/Head/Face/Nose,Property/Data-property/Data-value/Spatiotemporal-value/Rate-of-change/Acceleration/4.5 m-per-s^2"]}) - result = convert_to_form(df, self.schema, "long_tag", ['column1']) - pd.testing.assert_frame_equal(result, expected_df) + convert_to_form(df, self.schema, "long_tag", ['column1']) + pd.testing.assert_frame_equal(df, expected_df) def test_basic_expand_detection(self): # all simple cases with no duplicates diff --git a/tests/models/test_sidecar.py b/tests/models/test_sidecar.py index 4fdacb31f..8383de6f8 100644 --- a/tests/models/test_sidecar.py +++ b/tests/models/test_sidecar.py @@ -142,7 +142,7 @@ def test_set_hed_strings(self): for column_data in sidecar: hed_strings = column_data.get_hed_strings() - hed_strings = df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag") + df_util.convert_to_form(hed_strings, self.hed_schema, "long_tag") column_data.set_hed_strings(hed_strings) sidecar_long = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/long_tag_test.json")) self.assertEqual(sidecar.loaded_dict, sidecar_long.loaded_dict) @@ -151,7 +151,7 @@ def test_set_hed_strings(self): for column_data in sidecar: hed_strings = column_data.get_hed_strings() - hed_strings = df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag") + df_util.convert_to_form(hed_strings, self.hed_schema, "short_tag") column_data.set_hed_strings(hed_strings) sidecar_short = Sidecar(os.path.join(self.base_data_dir, "sidecar_tests/short_tag_test.json")) self.assertEqual(sidecar.loaded_dict, sidecar_short.loaded_dict)