Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion hed/tools/analysis/hed_context_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from hed.schema import HedSchema, HedSchemaGroup
from hed.tools.analysis.analysis_util import hed_to_str

#TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager.
# TODO: [Refactor] clean up distinction between hed as strings versus objects -- maybe replace by event manager.
# TODO: Implement insets

class OnsetGroup:
def __init__(self, name, contents, start_index, end_index=None):
Expand Down
10 changes: 5 additions & 5 deletions hed/tools/analysis/hed_type_factors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@ def get_factors(self, factor_encoding="one-hot"):
DataFrame: DataFrame containing the factor vectors as the columns.

"""
df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.type_value])
df.loc[list(self.direct_indices.keys()), [self.type_value]] = 1

if not self.levels:
df = pd.DataFrame(0, index=range(self.number_elements), columns=[self.type_value])
df.loc[list(self.direct_indices.keys()), [self.type_value]] = 1
return df

levels = list(self.levels.keys())
levels_list = [f"{self.type_value}.{level}" for level in levels]
df_levels = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list)
factors = pd.DataFrame(0, index=range(self.number_elements), columns=levels_list)
for index, level in enumerate(levels):
index_keys = list(self.levels[level].keys())
df_levels.loc[index_keys, [levels_list[index]]] = 1
factors = pd.concat([df, df_levels], axis=1)
factors.loc[index_keys, [levels_list[index]]] = 1
if factor_encoding == "one-hot":
return factors
sum_factors = factors.sum(axis=1)
Expand Down
14 changes: 8 additions & 6 deletions hed/tools/analysis/hed_type_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,21 @@ def get_factor_vectors(self, type_tag, type_values=None, factor_encoding="one-ho
factor_encoding (str): Specifies type of factor encoding (one-hot or categorical).

Returns:
DataFrame: DataFrame containing the factor vectors as the columns.
DataFrame or None: DataFrame containing the factor vectors as the columns.

"""
this_var = self.get_type_variable(type_tag)
this_var = self.get_type_variable(type_tag.lower())
if this_var is None:
return None
variables = this_var.get_type_value_names()
if variables is None:
variables = type_values
df_list = [0]*len(variables)
for index, variable in enumerate(variables):
if not type_values:
type_values = variables
df_list = [0]*len(type_values)
for index, variable in enumerate(type_values):
var_sum = this_var._type_value_map[variable]
df_list[index] = var_sum.get_factors(factor_encoding=factor_encoding)
if not df_list:
return None
return pd.concat(df_list, axis=1)

def get_type_variable(self, type_tag):
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/remodeling/operations/factor_hed_type_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):
var_manager = HedTypeManager(hed_strings, dispatcher.hed_schema, definitions)
var_manager.add_type_variable(self.type_tag.lower())

df_factors = var_manager.get_factor_vectors(self.type_tag, [], factor_encoding="one-hot")
df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot")
if len(df_factors.columns) > 0:
df_list.append(df_factors)
df_new = pd.concat(df_list, axis=1)
Expand Down
7 changes: 5 additions & 2 deletions tests/tools/analysis/test_hed_type_factors.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,11 @@ def test_get_variable_factors(self):
self.assertIsInstance(factors, pd.DataFrame, "get_factors contains dataframe.")
self.assertEqual(len(factors), var_sum.number_elements,
"get_factors has factors of same length as number of elements")
self.assertEqual(len(factors.columns), summary["levels"] + 1,
'get_factors has factors levels + 1 (direct references)')
if not var_manager._type_value_map[variable].levels:
self.assertEqual(len(factors.columns), 1)
else:
self.assertEqual(len(factors.columns), summary["levels"], 'get_factors has factors levels')
self.assertEqual(len(factors.columns), len(var_manager._type_value_map[variable].levels))

def test_count_events(self):
list1 = [0, 2, 6, 1, 2, 0, 0]
Expand Down
4 changes: 2 additions & 2 deletions tests/tools/analysis/test_hed_type_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def test_get_factor_vectors(self):
df_task = var_manager.get_factor_vectors("task")
self.assertEqual(len(df_cond), base_length, "get_factor_vectors returns df same length as original")
self.assertEqual(len(df_task), base_length, "get_factor_vectors returns df same length as original if 2 types")
self.assertEqual(len(df_cond.columns), 10, "get_factor_vectors has right number of factors")
self.assertEqual(len(df_task.columns), 4, "get_factor_vectors has right number of factors if 2 types")
self.assertEqual(len(df_cond.columns), 7, "get_factor_vectors has right number of factors")
self.assertEqual(len(df_task.columns), 2, "get_factor_vectors has right number of factors if 2 types")
df_baloney = var_manager.get_factor_vectors("baloney")
self.assertIsNone(df_baloney, "get_factor_vectors returns None if no factors")

Expand Down
4 changes: 2 additions & 2 deletions tests/tools/analysis/test_hed_type_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ def test_get_variable_factors(self):
df_new1 = var_manager.get_type_factors()
self.assertIsInstance(df_new1, DataFrame)
self.assertEqual(len(df_new1), 200)
self.assertEqual(len(df_new1.columns), 10)
self.assertEqual(len(df_new1.columns), 7)
df_new2 = var_manager.get_type_factors(type_values=["face-type"])
self.assertEqual(len(df_new2), 200)
self.assertEqual(len(df_new2.columns), 4)
self.assertEqual(len(df_new2.columns), 3)
df_new3 = var_manager.get_type_factors(type_values=["junk"])
self.assertIsNone(df_new3)

Expand Down
6 changes: 2 additions & 4 deletions tests/tools/remodeling/operations/test_factor_hed_type_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ def test_valid(self):
op = FactorHedTypeOp(self.base_parameters)
df_new = op.do_op(self.dispatch, self.data_path, 'subj2_run1', sidecar=self.json_path)
self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct")
self.assertEqual(len(df_new.columns), 20, "factor_hed_type_op has correct number of columns")
self.assertEqual(len(df_new.columns), 17, "factor_hed_type_op has correct number of columns")

def test_valid_specific_column(self):
# Not implemented yet
# Test correct when all valid and no unwanted information
parms = self.base_parameters
parms["type_values"] = ["key-assignment"]
op = FactorHedTypeOp(parms)
Expand All @@ -46,7 +44,7 @@ def test_valid_specific_column(self):
df_new = op.do_op(dispatch, dispatch.prep_data(df_new), 'run-01', sidecar=self.json_path)
df_new = dispatch.post_proc_data(df_new)
self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct when type_values specified")
self.assertEqual(len(df_new.columns), 20,
self.assertEqual(len(df_new.columns), 11,
"factor_hed_type_op has correct number of columns when type_values specified")


Expand Down
4 changes: 2 additions & 2 deletions tests/tools/remodeling/test_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ def test_run_operations_hed(self):
df = dispatch.run_operations(events_path, sidecar=sidecar_path, verbose=False)
self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(len(df), 200)
self.assertEqual(len(df.columns), 20)
self.assertIn('key-assignment', df.columns)
self.assertEqual(len(df.columns), 17)
self.assertIn('key-assignment.right-sym-cond', df.columns)

def test_save_summaries(self):
with open(self.summarize_model) as fp:
Expand Down