From 21590f20c51f629624de65a50ecfd1a08c24f47f Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 16 Mar 2023 16:32:12 -0500 Subject: [PATCH] Updated unit tests --- hed/models/df_util.py | 2 +- hed/tools/analysis/analysis_util.py | 16 +++-- hed/tools/analysis/hed_context_manager.py | 2 +- hed/tools/analysis/hed_type_definitions.py | 10 +-- .../operations/convert_columns_op.py | 70 +++++++++++++++++++ .../remodeling/operations/valid_operations.py | 2 + .../test_analysis_util_assemble_hed.py | 13 ++-- .../analysis/test_hed_context_manager.py | 16 +++-- tests/tools/analysis/test_hed_tag_counts.py | 2 +- .../operations/test_convert_columns_op.py | 50 +++++++++++++ 10 files changed, 159 insertions(+), 24 deletions(-) create mode 100644 hed/tools/remodeling/operations/convert_columns_op.py create mode 100644 tests/tools/remodeling/operations/test_convert_columns_op.py diff --git a/hed/models/df_util.py b/hed/models/df_util.py index b7e73a282..d877028aa 100644 --- a/hed/models/df_util.py +++ b/hed/models/df_util.py @@ -14,7 +14,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_ The path to the tabular file, or a TabularInput object representing it. sidecar: str or Sidecar The path to the sidecar file, or a Sidecar object representing it. - hed_schema: str or HedSchema + hed_schema: HedSchema If str, will attempt to load as a version if it doesn't have a valid extension. extra_def_dicts: list of DefinitionDict, optional Any extra DefinitionDict objects to use when parsing the HED tags. diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py index c93debd0d..27f442c3d 100644 --- a/hed/tools/analysis/analysis_util.py +++ b/hed/tools/analysis/analysis_util.py @@ -6,13 +6,16 @@ from hed.tools.util.data_util import separate_values from hed.models.hed_tag import HedTag from hed.models.hed_group import HedGroup +from hed.models.df_util import get_assembled, expand_defs -def assemble_hed(data_input, columns_included=None, expand_defs=False): +def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False): """ Return assembled HED annotations in a dataframe. Parameters: data_input (TabularInput): The tabular input file whose HED annotations are to be assembled. + sidecar (Sidecar): Sidecar with definitions. + schema (HedSchema): Hed schema columns_included (list or None): A list of additional column names to include. If None, only the list of assembled tags is included. expand_defs (bool): If True, definitions are expanded when the events are assembled. @@ -23,14 +26,19 @@ def assemble_hed(data_input, columns_included=None, expand_defs=False): """ eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included) - hed_obj_list = get_assembled_strings(data_input, expand_defs=expand_defs) - hed_string_list = [str(hed) for hed in hed_obj_list] + hed_string_list = data_input.series_a + definitions = sidecar.get_def_dict(hed_schema=schema) + if expand_defs: + expand_defs(hed_string_list, schema, definitions, columns=None) + # hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True, + # shrink_defs=False, expand_defs=True) + # hed_string_list = [str(hed) for hed in hed_obj_list] if not eligible_columns: df = pd.DataFrame({"HED_assembled": hed_string_list}) else: df = data_input.dataframe[eligible_columns].copy(deep=True) df['HED_assembled'] = hed_string_list - definitions = data_input.get_definitions().gathered_defs + # definitions = data_input.get_definitions().gathered_defs return df, definitions diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py index 011330662..06a02dc82 100644 --- a/hed/tools/analysis/hed_context_manager.py +++ b/hed/tools/analysis/hed_context_manager.py @@ -35,7 +35,7 @@ def __init__(self, hed_strings, hed_schema): """ - self.hed_strings = [HedString(str(hed), hed_schema=hed_schema) for hed in hed_strings] + self.hed_strings = hed_strings if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup): raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup") self.hed_schema = hed_schema diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_definitions.py index 644802627..8d49dc060 100644 --- a/hed/tools/analysis/hed_type_definitions.py +++ b/hed/tools/analysis/hed_type_definitions.py @@ -1,7 +1,7 @@ """ Manages definitions associated with a type such as condition-variable. """ from hed.models.hed_tag import HedTag -from hed.models.def_mapper import DefMapper +from hed.models.definition_dict import DefinitionDict class HedTypeDefinitions: @@ -10,16 +10,18 @@ def __init__(self, definitions, hed_schema, type_tag='condition-variable'): """ Create a definition manager for a type of variable. Parameters: - definitions (dict or DefMapper): A dictionary of DefinitionEntry objects. + definitions (dict or DefinitionDict): A dictionary of DefinitionEntry objects. hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing. type_tag (str): Lower-case HED tag string representing the type managed. + # TODO: [Refactor] - should dict be allowed for definitions. + """ self.type_tag = type_tag.lower() self.hed_schema = hed_schema - if isinstance(definitions, DefMapper): - self.definitions = definitions.gathered_defs + if isinstance(definitions, DefinitionDict): + self.definitions = definitions.defs elif isinstance(definitions, dict): self.definitions = definitions else: diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py new file mode 100644 index 000000000..ae383a1e4 --- /dev/null +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -0,0 +1,70 @@ +""" Convert the type of the specified columns of a tabular file. """ + +from hed.tools.remodeling.operations.base_op import BaseOp + + +class ConvertColumnsOp(BaseOp): + """ Convert. + + Required remodeling parameters: + - **column_names** (*list*): The list of columns to convert. + - **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) + - **decimal_places** (*int*): Number decimal places to keep (for fixed only). + + + """ + + PARAMS = { + "operation": "convert_columns", + "required_parameters": { + "column_names": list, + "convert_to": str + }, + "optional_parameters": { + "decimal_places": int + } + } + + def __init__(self, parameters): + """ Constructor for the convert columns operation. + + Parameters: + parameters (dict): Parameter values for required and optional parameters. + + Raises: + KeyError + - If a required parameter is missing. + - If an unexpected parameter is provided. + + TypeError + - If a parameter has the wrong type. + + ValueError + - If convert_to is not one of the allowed values. + + """ + super().__init__(self.PARAMS, parameters) + self.column_names = parameters['column_names'] + self.convert_to = parameters['convert_to'] + self.decimal_places = parameters.get('decimal_places', None) + self.allowed_types = ['str', 'int', 'float', 'fixed'] + if self.convert_to not in self.allowed_types: + raise ValueError("CannotConvertToSpecifiedType", + f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}") + + def do_op(self, dispatcher, df, name, sidecar=None): + """ Convert the specified column to a specified type. + + Parameters: + dispatcher (Dispatcher): Manages the operation I/O. + df (DataFrame): The DataFrame to be remodeled. + name (str): Unique identifier for the dataframe -- often the original file path. + sidecar (Sidecar or file-like): Only needed for HED operations. + + Returns: + DataFrame: A new DataFrame with the factor columns appended. + + """ + + df_new = df.copy() + return df_new diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py index 36761591a..d00391270 100644 --- a/hed/tools/remodeling/operations/valid_operations.py +++ b/hed/tools/remodeling/operations/valid_operations.py @@ -1,5 +1,6 @@ """ The valid operations for the remodeling tools. """ +# from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp @@ -20,6 +21,7 @@ from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp valid_operations = { + # 'convert_columns': ConvertColumnsOp, 'factor_column': FactorColumnOp, 'factor_hed_tags': FactorHedTagsOp, 'factor_hed_type': FactorHedTypeOp, diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py index 058213e3e..9c37b8620 100644 --- a/tests/tools/analysis/test_analysis_util_assemble_hed.py +++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py @@ -22,13 +22,14 @@ def setUpClass(cls): hed_schema = hedschema.load_schema(schema_path) cls.hed_schema = hed_schema - sidecar1 = Sidecar(json_path, name='face_sub1_json', hed_schema=hed_schema) + sidecar1 = Sidecar(json_path, name='face_sub1_json') cls.sidecar_path = sidecar1 - cls.input_data = TabularInput(events_path, hed_schema=hed_schema, sidecar=sidecar1, name="face_sub1_events") + cls.sidecar1 = sidecar1 + cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar") def test_assemble_hed_included_no_expand(self): - df1, dict1 = assemble_hed(self.input_data, + df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema, columns_included=["onset", "duration", "event_type"], expand_defs=False) self.assertIsInstance(df1, DataFrame, "hed_assemble should return a dataframe when columns are included") columns1 = list(df1.columns) @@ -38,11 +39,11 @@ def test_assemble_hed_included_no_expand(self): self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags") self.assertEqual(first_str1.find('Def-expand'), -1, "assemble_hed with no def expand does not have Def-expand tags") - self.assertIsInstance(dict1, dict, "hed_assemble returns a dictionary of definitions") - self.assertEqual(len(dict1), 17, "hed_assemble definition dictionary has the right number of elements.") + self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions") + self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.") def test_assemble_hed_included_expand(self): - df2, dict2 = assemble_hed(self.input_data, + df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema, columns_included=["onset", "duration", "event_type"], expand_defs=True) first_str2 = df2.iloc[0]['HED_assembled'] self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag") diff --git a/tests/tools/analysis/test_hed_context_manager.py b/tests/tools/analysis/test_hed_context_manager.py index 9ad70e958..26e0f4e87 100644 --- a/tests/tools/analysis/test_hed_context_manager.py +++ b/tests/tools/analysis/test_hed_context_manager.py @@ -1,13 +1,12 @@ import os import unittest from hed.errors.exceptions import HedFileError -from hed.models.hed_group import HedGroup from hed.models.hed_string import HedString from hed.models.sidecar import Sidecar from hed.models.tabular_input import TabularInput from hed.schema.hed_schema_io import load_schema_version -from hed.tools.analysis.hed_context_manager import HedContextManager, OnsetGroup -from hed.tools.analysis.analysis_util import get_assembled_strings +from hed.tools.analysis.hed_context_manager import HedContextManager +from hed.models.df_util import get_assembled class Test(unittest.TestCase): @@ -37,7 +36,8 @@ def setUpClass(cls): 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) sidecar1 = Sidecar(sidecar_path, name='face_sub1_json') - cls.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events") + cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + cls.sidecar1 = sidecar1 cls.schema = schema # def test_onset_group(self): @@ -71,13 +71,14 @@ def test_constructor(self): self.assertIsInstance(context, list, "The constructor event contexts should be a list") self.assertIsInstance(context[1], HedString, "The constructor event contexts has a correct element") - def test_constructor(self): + def test_constructor1(self): with self.assertRaises(ValueError) as cont: HedContextManager(self.test_strings1, None) self.assertEqual(cont.exception.args[0], "ContextRequiresSchema") def test_iter(self): - hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) + hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, + join_columns=True, shrink_defs=True, expand_defs=False) manager1 = HedContextManager(hed_strings, self.schema) i = 0 for hed, context in manager1.iter_context(): @@ -86,7 +87,8 @@ def test_iter(self): i = i + 1 def test_constructor_from_assembled(self): - hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False) + hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None, + join_columns=True, shrink_defs=True, expand_defs=False) manager1 = HedContextManager(hed_strings, self.schema) self.assertEqual(len(manager1.hed_strings), 200, "The constructor for assembled strings has expected # of strings") diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py index ece27f496..76b0a9eaf 100644 --- a/tests/tools/analysis/test_hed_tag_counts.py +++ b/tests/tools/analysis/test_hed_tag_counts.py @@ -24,7 +24,7 @@ def setUpClass(cls): schema = hedschema.load_schema(schema_path) cls.hed_schema = schema sidecar1 = Sidecar(json_path, name='face_sub1_json') - input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events") + input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") input_df, def_dict = assemble_hed(input_data, expand_defs=False) cls.input_df = input_df cls.def_dict = def_dict diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py new file mode 100644 index 000000000..01a27f949 --- /dev/null +++ b/tests/tools/remodeling/operations/test_convert_columns_op.py @@ -0,0 +1,50 @@ +import pandas as pd +import numpy as np +import unittest +from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp +from hed.tools.remodeling.dispatcher import Dispatcher + + +class Test(unittest.TestCase): + """ + + TODO: Test when no factor names and values are given. + + """ + @classmethod + def setUpClass(cls): + cls.sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'], + [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female'], + [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female'], + [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female'], + [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male'], + [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male']] + cls.factored = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female', 0, 0], + [5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female', 0, 1], + [9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female', 0, 0], + [13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female', 1, 0], + [17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male', 0, 1], + [21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male', 0, 0]] + cls.sample_columns = ['onset', 'duration', 'trial_type', 'stop_signal_delay', 'response_time', + 'response_accuracy', 'response_hand', 'sex'] + cls.default_factor_columns = ["trial_type.succesful_stop", "trial_type.unsuccesful_stop"] + + def setUp(self): + self.base_parameters = { + "column_names": ["onset", "duration", "response_time"], + "convert_to": "int" + } + + @classmethod + def tearDownClass(cls): + pass + + def test_constructor_bad_convert_to(self): + self.base_parameters["convert_to"] = "blech" + with self.assertRaises(ValueError) as context: + ConvertColumnsOp(self.base_parameters) + self.assertEqual(context.exception.args[0], "CannotConvertToSpecifiedType") + + +if __name__ == '__main__': + unittest.main()