diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py index ebf053d2f..f3a5b8758 100644 --- a/hed/tools/analysis/hed_context_manager.py +++ b/hed/tools/analysis/hed_context_manager.py @@ -5,7 +5,8 @@ from hed.schema import HedSchema, HedSchemaGroup from hed.tools.analysis.analysis_util import hed_to_str -#TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager. +# TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager. +# TODO: Implement insets class OnsetGroup: def __init__(self, name, contents, start_index, end_index=None): diff --git a/hed/tools/analysis/hed_type_factors.py b/hed/tools/analysis/hed_type_factors.py index bf6afff2f..b4cc92af4 100644 --- a/hed/tools/analysis/hed_type_factors.py +++ b/hed/tools/analysis/hed_type_factors.py @@ -39,18 +39,18 @@ def get_factors(self, factor_encoding="one-hot"): DataFrame: DataFrame containing the factor vectors as the columns. """ - df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.type_value]) - df.loc[list(self.direct_indices.keys()), [self.type_value]] = 1 + if not self.levels: + df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.type_value]) + df.loc[list(self.direct_indices.keys()), [self.type_value]] = 1 return df levels = list(self.levels.keys()) levels_list = [f"{self.type_value}.{level}" for level in levels] - df_levels = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list) + factors = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list) for index, level in enumerate(levels): index_keys = list(self.levels[level].keys()) - df_levels.loc[index_keys, [levels_list[index]]] = 1 - factors = pd.concat([df, df_levels], axis=1) + factors.loc[index_keys, [levels_list[index]]] = 1 if factor_encoding == "one-hot": return factors sum_factors = factors.sum(axis=1) diff --git a/hed/tools/analysis/hed_type_manager.py b/hed/tools/analysis/hed_type_manager.py index 43ff826e8..3abe427ff 100644 --- a/hed/tools/analysis/hed_type_manager.py +++ b/hed/tools/analysis/hed_type_manager.py @@ -44,19 +44,21 @@ def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-ho factor_encoding (str): Specifies type of factor encoding (one-hot or categorical). Returns: - DataFrame: DataFrame containing the factor vectors as the columns. + DataFrame or None: DataFrame containing the factor vectors as the columns. """ - this_var = self.get_type_variable(type_tag) + this_var = self.get_type_variable(type_tag.lower()) if this_var is None: return None variables = this_var.get_type_value_names() - if variables is None: - variables = type_values - df_list = [0]*len(variables) - for index, variable in enumerate(variables): + if not type_values: + type_values = variables + df_list = [0]*len(type_values) + for index, variable in enumerate(type_values): var_sum = this_var._type_value_map[variable] df_list[index] = var_sum.get_factors(factor_encoding=factor_encoding) + if not df_list: + return None return pd.concat(df_list, axis=1) def get_type_variable(self, type_tag): diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 1d5674d7a..21057b798 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -78,7 +78,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): var_manager = HedTypeManager(hed_strings, dispatcher.hed_schema, definitions) var_manager.add_type_variable(self.type_tag.lower()) - df_factors = var_manager.get_factor_vectors(self.type_tag, [], factor_encoding="one-hot") + df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot") if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) diff --git a/tests/tools/analysis/test_hed_type_factors.py b/tests/tools/analysis/test_hed_type_factors.py index 5821e2675..378617a12 100644 --- a/tests/tools/analysis/test_hed_type_factors.py +++ b/tests/tools/analysis/test_hed_type_factors.py @@ -139,8 +139,11 @@ def test_get_variable_factors(self): self.assertIsInstance(factors, pd.DataFrame, "get_factors contains dataframe.") self.assertEqual(len(factors), var_sum.number_elements, "get_factors has factors of same length as number of elements") - self.assertEqual(len(factors.columns), summary["levels"] + 1, - 'get_factors has factors levels + 1 (direct references)') + if not var_manager._type_value_map[variable].levels: + self.assertEqual(len(factors.columns), 1) + else: + self.assertEqual(len(factors.columns), summary["levels"], 'get_factors has factors levels') + self.assertEqual(len(factors.columns), len(var_manager._type_value_map[variable].levels)) def test_count_events(self): list1 = [0, 2, 6, 1, 2, 0, 0] diff --git a/tests/tools/analysis/test_hed_type_manager.py b/tests/tools/analysis/test_hed_type_manager.py index 9fd7abce2..657e9fec5 100644 --- a/tests/tools/analysis/test_hed_type_manager.py +++ b/tests/tools/analysis/test_hed_type_manager.py @@ -60,8 +60,8 @@ def test_get_factor_vectors(self): df_task = var_manager.get_factor_vectors("task") self.assertEqual(len(df_cond), base_length, "get_factor_vectors returns df same length as original") self.assertEqual(len(df_task), base_length, "get_factor_vectors returns df same length as original if 2 types") - self.assertEqual(len(df_cond.columns), 10, "get_factor_vectors has right number of factors") - self.assertEqual(len(df_task.columns), 4, "get_factor_vectors has right number of factors if 2 types") + self.assertEqual(len(df_cond.columns), 7, "get_factor_vectors has right number of factors") + self.assertEqual(len(df_task.columns), 2, "get_factor_vectors has right number of factors if 2 types") df_baloney = var_manager.get_factor_vectors("baloney") self.assertIsNone(df_baloney, "get_factor_vectors returns None if no factors") diff --git a/tests/tools/analysis/test_hed_type_values.py b/tests/tools/analysis/test_hed_type_values.py index 4b3125353..d8428e23c 100644 --- a/tests/tools/analysis/test_hed_type_values.py +++ b/tests/tools/analysis/test_hed_type_values.py @@ -116,10 +116,10 @@ def test_get_variable_factors(self): df_new1 = var_manager.get_type_factors() self.assertIsInstance(df_new1, DataFrame) self.assertEqual(len(df_new1), 200) - self.assertEqual(len(df_new1.columns), 10) + self.assertEqual(len(df_new1.columns), 7) df_new2 = var_manager.get_type_factors(type_values=["face-type"]) self.assertEqual(len(df_new2), 200) - self.assertEqual(len(df_new2.columns), 4) + self.assertEqual(len(df_new2.columns), 3) df_new3 = var_manager.get_type_factors(type_values=["junk"]) self.assertIsNone(df_new3) diff --git a/tests/tools/remodeling/operations/test_factor_hed_type_op.py b/tests/tools/remodeling/operations/test_factor_hed_type_op.py index 22f39617a..e43e0e803 100644 --- a/tests/tools/remodeling/operations/test_factor_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_factor_hed_type_op.py @@ -33,11 +33,9 @@ def test_valid(self): op = FactorHedTypeOp(self.base_parameters) df_new = op.do_op(self.dispatch, self.data_path, 'subj2_run1', sidecar=self.json_path) self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct") - self.assertEqual(len(df_new.columns), 20, "factor_hed_type_op has correct number of columns") + self.assertEqual(len(df_new.columns), 17, "factor_hed_type_op has correct number of columns") def test_valid_specific_column(self): - # Not implemented yet - # Test correct when all valid and no unwanted information parms = self.base_parameters parms["type_values"] = ["key-assignment"] op = FactorHedTypeOp(parms) @@ -46,7 +44,7 @@ def test_valid_specific_column(self): df_new = op.do_op(dispatch, dispatch.prep_data(df_new), 'run-01', sidecar=self.json_path) df_new = dispatch.post_proc_data(df_new) self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct when type_values specified") - self.assertEqual(len(df_new.columns), 20, + self.assertEqual(len(df_new.columns), 11, "factor_hed_type_op has correct number of columns when type_values specified") diff --git a/tests/tools/remodeling/test_dispatcher.py b/tests/tools/remodeling/test_dispatcher.py index e2ff311a9..177a4fe43 100644 --- a/tests/tools/remodeling/test_dispatcher.py +++ b/tests/tools/remodeling/test_dispatcher.py @@ -182,8 +182,8 @@ def test_run_operations_hed(self): df = dispatch.run_operations(events_path, sidecar=sidecar_path, verbose=False) self.assertIsInstance(df, pd.DataFrame) self.assertEqual(len(df), 200) - self.assertEqual(len(df.columns), 20) - self.assertIn('key-assignment', df.columns) + self.assertEqual(len(df.columns), 17) + self.assertIn('key-assignment.right-sym-cond', df.columns) def test_save_summaries(self): with open(self.summarize_model) as fp: