Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def get_assembled(tabular_file, sidecar, hed_schema, extra_def_dicts=None, join_
The path to the tabular file, or a TabularInput object representing it.
sidecar: str or Sidecar
The path to the sidecar file, or a Sidecar object representing it.
hed_schema: str or HedSchema
hed_schema: HedSchema
If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
Expand Down
16 changes: 12 additions & 4 deletions hed/tools/analysis/analysis_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
from hed.tools.util.data_util import separate_values
from hed.models.hed_tag import HedTag
from hed.models.hed_group import HedGroup
from hed.models.df_util import get_assembled, expand_defs


def assemble_hed(data_input, columns_included=None, expand_defs=False):
def assemble_hed(data_input, sidecar, schema, columns_included=None, expand_defs=False):
""" Return assembled HED annotations in a dataframe.

Parameters:
data_input (TabularInput): The tabular input file whose HED annotations are to be assembled.
sidecar (Sidecar): Sidecar with definitions.
schema (HedSchema): Hed schema
columns_included (list or None): A list of additional column names to include.
If None, only the list of assembled tags is included.
expand_defs (bool): If True, definitions are expanded when the events are assembled.
Expand All @@ -23,14 +26,19 @@ def assemble_hed(data_input, columns_included=None, expand_defs=False):
"""

eligible_columns, missing_columns = separate_values(list(data_input.dataframe.columns), columns_included)
hed_obj_list = get_assembled_strings(data_input, expand_defs=expand_defs)
hed_string_list = [str(hed) for hed in hed_obj_list]
hed_string_list = data_input.series_a
definitions = sidecar.get_def_dict(hed_schema=schema)
if expand_defs:
expand_defs(hed_string_list, schema, definitions, columns=None)
# hed_obj_list, defs = get_assembled(data_input, sidecar, schema, extra_def_dicts=None, join_columns=True,
# shrink_defs=False, expand_defs=True)
# hed_string_list = [str(hed) for hed in hed_obj_list]
if not eligible_columns:
df = pd.DataFrame({"HED_assembled": hed_string_list})
else:
df = data_input.dataframe[eligible_columns].copy(deep=True)
df['HED_assembled'] = hed_string_list
definitions = data_input.get_definitions().gathered_defs
# definitions = data_input.get_definitions().gathered_defs
return df, definitions


Expand Down
2 changes: 1 addition & 1 deletion hed/tools/analysis/hed_context_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, hed_strings, hed_schema):

"""

self.hed_strings = [HedString(str(hed), hed_schema=hed_schema) for hed in hed_strings]
self.hed_strings = hed_strings
if not isinstance(hed_schema, HedSchema) and not isinstance(hed_schema, HedSchemaGroup):
raise ValueError("ContextRequiresSchema", f"Context manager must have a valid HedSchema of HedSchemaGroup")
self.hed_schema = hed_schema
Expand Down
10 changes: 6 additions & 4 deletions hed/tools/analysis/hed_type_definitions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Manages definitions associated with a type such as condition-variable. """

from hed.models.hed_tag import HedTag
from hed.models.def_mapper import DefMapper
from hed.models.definition_dict import DefinitionDict


class HedTypeDefinitions:
Expand All @@ -10,16 +10,18 @@ def __init__(self, definitions, hed_schema, type_tag='condition-variable'):
""" Create a definition manager for a type of variable.

Parameters:
definitions (dict or DefMapper): A dictionary of DefinitionEntry objects.
definitions (dict or DefinitionDict): A dictionary of DefinitionEntry objects.
hed_schema (Hedschema or HedSchemaGroup): The schema used for parsing.
type_tag (str): Lower-case HED tag string representing the type managed.

# TODO: [Refactor] - should dict be allowed for definitions.

"""

self.type_tag = type_tag.lower()
self.hed_schema = hed_schema
if isinstance(definitions, DefMapper):
self.definitions = definitions.gathered_defs
if isinstance(definitions, DefinitionDict):
self.definitions = definitions.defs
elif isinstance(definitions, dict):
self.definitions = definitions
else:
Expand Down
70 changes: 70 additions & 0 deletions hed/tools/remodeling/operations/convert_columns_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
""" Convert the type of the specified columns of a tabular file. """

from hed.tools.remodeling.operations.base_op import BaseOp


class ConvertColumnsOp(BaseOp):
""" Convert.

Required remodeling parameters:
- **column_names** (*list*): The list of columns to convert.
- **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).


"""

PARAMS = {
"operation": "convert_columns",
"required_parameters": {
"column_names": list,
"convert_to": str
},
"optional_parameters": {
"decimal_places": int
}
}

def __init__(self, parameters):
""" Constructor for the convert columns operation.

Parameters:
parameters (dict): Parameter values for required and optional parameters.

Raises:
KeyError
- If a required parameter is missing.
- If an unexpected parameter is provided.

TypeError
- If a parameter has the wrong type.

ValueError
- If convert_to is not one of the allowed values.

"""
super().__init__(self.PARAMS, parameters)
self.column_names = parameters['column_names']
self.convert_to = parameters['convert_to']
self.decimal_places = parameters.get('decimal_places', None)
self.allowed_types = ['str', 'int', 'float', 'fixed']
if self.convert_to not in self.allowed_types:
raise ValueError("CannotConvertToSpecifiedType",
f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}")

def do_op(self, dispatcher, df, name, sidecar=None):
""" Convert the specified column to a specified type.

Parameters:
dispatcher (Dispatcher): Manages the operation I/O.
df (DataFrame): The DataFrame to be remodeled.
name (str): Unique identifier for the dataframe -- often the original file path.
sidecar (Sidecar or file-like): Only needed for HED operations.

Returns:
DataFrame: A new DataFrame with the factor columns appended.

"""

df_new = df.copy()
return df_new
2 changes: 2 additions & 0 deletions hed/tools/remodeling/operations/valid_operations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" The valid operations for the remodeling tools. """

# from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp
from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp
from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp
from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp
Expand All @@ -20,6 +21,7 @@
from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp

valid_operations = {
# 'convert_columns': ConvertColumnsOp,
'factor_column': FactorColumnOp,
'factor_hed_tags': FactorHedTagsOp,
'factor_hed_type': FactorHedTypeOp,
Expand Down
13 changes: 7 additions & 6 deletions tests/tools/analysis/test_analysis_util_assemble_hed.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ def setUpClass(cls):

hed_schema = hedschema.load_schema(schema_path)
cls.hed_schema = hed_schema
sidecar1 = Sidecar(json_path, name='face_sub1_json', hed_schema=hed_schema)
sidecar1 = Sidecar(json_path, name='face_sub1_json')
cls.sidecar_path = sidecar1
cls.input_data = TabularInput(events_path, hed_schema=hed_schema, sidecar=sidecar1, name="face_sub1_events")
cls.sidecar1 = sidecar1
cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
cls.input_data_no_sidecar = TabularInput(events_path, name="face_sub1_events_no_sidecar")

def test_assemble_hed_included_no_expand(self):
df1, dict1 = assemble_hed(self.input_data,
df1, dict1 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
columns_included=["onset", "duration", "event_type"], expand_defs=False)
self.assertIsInstance(df1, DataFrame, "hed_assemble should return a dataframe when columns are included")
columns1 = list(df1.columns)
Expand All @@ -38,11 +39,11 @@ def test_assemble_hed_included_no_expand(self):
self.assertNotEqual(first_str1.find('Def/'), -1, "assemble_hed with no def expand has Def tags")
self.assertEqual(first_str1.find('Def-expand'), -1,
"assemble_hed with no def expand does not have Def-expand tags")
self.assertIsInstance(dict1, dict, "hed_assemble returns a dictionary of definitions")
self.assertEqual(len(dict1), 17, "hed_assemble definition dictionary has the right number of elements.")
self.assertIsInstance(dict1.defs, dict, "hed_assemble returns a dictionary of definitions")
self.assertEqual(len(dict1.defs), 17, "hed_assemble definition dictionary has the right number of elements.")

def test_assemble_hed_included_expand(self):
df2, dict2 = assemble_hed(self.input_data,
df2, dict2 = assemble_hed(self.input_data, self.sidecar1, self.hed_schema,
columns_included=["onset", "duration", "event_type"], expand_defs=True)
first_str2 = df2.iloc[0]['HED_assembled']
self.assertEqual(first_str2.find('Def/'), -1, "assemble_hed with def expand has no Def tag")
Expand Down
16 changes: 9 additions & 7 deletions tests/tools/analysis/test_hed_context_manager.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import os
import unittest
from hed.errors.exceptions import HedFileError
from hed.models.hed_group import HedGroup
from hed.models.hed_string import HedString
from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
from hed.schema.hed_schema_io import load_schema_version
from hed.tools.analysis.hed_context_manager import HedContextManager, OnsetGroup
from hed.tools.analysis.analysis_util import get_assembled_strings
from hed.tools.analysis.hed_context_manager import HedContextManager
from hed.models.df_util import get_assembled


class Test(unittest.TestCase):
Expand Down Expand Up @@ -37,7 +36,8 @@ def setUpClass(cls):
'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
sidecar_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
sidecar1 = Sidecar(sidecar_path, name='face_sub1_json')
cls.input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
cls.input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
cls.sidecar1 = sidecar1
cls.schema = schema

# def test_onset_group(self):
Expand Down Expand Up @@ -71,13 +71,14 @@ def test_constructor(self):
self.assertIsInstance(context, list, "The constructor event contexts should be a list")
self.assertIsInstance(context[1], HedString, "The constructor event contexts has a correct element")

def test_constructor(self):
def test_constructor1(self):
with self.assertRaises(ValueError) as cont:
HedContextManager(self.test_strings1, None)
self.assertEqual(cont.exception.args[0], "ContextRequiresSchema")

def test_iter(self):
hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
join_columns=True, shrink_defs=True, expand_defs=False)
manager1 = HedContextManager(hed_strings, self.schema)
i = 0
for hed, context in manager1.iter_context():
Expand All @@ -86,7 +87,8 @@ def test_iter(self):
i = i + 1

def test_constructor_from_assembled(self):
hed_strings = get_assembled_strings(self.input_data, hed_schema=self.schema, expand_defs=False)
hed_strings, _ = get_assembled(self.input_data, self.sidecar1, self.schema, extra_def_dicts=None,
join_columns=True, shrink_defs=True, expand_defs=False)
manager1 = HedContextManager(hed_strings, self.schema)
self.assertEqual(len(manager1.hed_strings), 200,
"The constructor for assembled strings has expected # of strings")
Expand Down
2 changes: 1 addition & 1 deletion tests/tools/analysis/test_hed_tag_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def setUpClass(cls):
schema = hedschema.load_schema(schema_path)
cls.hed_schema = schema
sidecar1 = Sidecar(json_path, name='face_sub1_json')
input_data = TabularInput(events_path, sidecar=sidecar1, hed_schema=schema, name="face_sub1_events")
input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
input_df, def_dict = assemble_hed(input_data, expand_defs=False)
cls.input_df = input_df
cls.def_dict = def_dict
Expand Down
50 changes: 50 additions & 0 deletions tests/tools/remodeling/operations/test_convert_columns_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pandas as pd
import numpy as np
import unittest
from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp
from hed.tools.remodeling.dispatcher import Dispatcher


class Test(unittest.TestCase):
"""

TODO: Test when no factor names and values are given.

"""
@classmethod
def setUpClass(cls):
cls.sample_data = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female'],
[5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female'],
[9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female'],
[13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female'],
[17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male'],
[21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male']]
cls.factored = [[0.0776, 0.5083, 'go', 'n/a', 0.565, 'correct', 'right', 'female', 0, 0],
[5.5774, 0.5083, 'unsuccesful_stop', 0.2, 0.49, 'correct', 'right', 'female', 0, 1],
[9.5856, 0.5084, 'go', 'n/a', 0.45, 'correct', 'right', 'female', 0, 0],
[13.5939, 0.5083, 'succesful_stop', 0.2, 'n/a', 'n/a', 'n/a', 'female', 1, 0],
[17.1021, 0.5083, 'unsuccesful_stop', 0.25, 0.633, 'correct', 'left', 'male', 0, 1],
[21.6103, 0.5083, 'go', 'n/a', 0.443, 'correct', 'left', 'male', 0, 0]]
cls.sample_columns = ['onset', 'duration', 'trial_type', 'stop_signal_delay', 'response_time',
'response_accuracy', 'response_hand', 'sex']
cls.default_factor_columns = ["trial_type.succesful_stop", "trial_type.unsuccesful_stop"]

def setUp(self):
self.base_parameters = {
"column_names": ["onset", "duration", "response_time"],
"convert_to": "int"
}

@classmethod
def tearDownClass(cls):
pass

def test_constructor_bad_convert_to(self):
self.base_parameters["convert_to"] = "blech"
with self.assertRaises(ValueError) as context:
ConvertColumnsOp(self.base_parameters)
self.assertEqual(context.exception.args[0], "CannotConvertToSpecifiedType")


if __name__ == '__main__':
unittest.main()