Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions hed/tools/analysis/annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import re

import pandas as pd
from pandas import DataFrame, Series
from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput

from hed.errors.exceptions import HedFileError
from hed.models import df_util
from hed.tools.bids.bids_dataset import BidsDataset
Expand Down Expand Up @@ -63,21 +65,6 @@ def df_to_hed(dataframe, description_tag=True):
return hed_dict


def series_to_factor(series):
"""Convert a series to an integer factor list.

Parameters:
series (Series) - Series to be converted to a list.

Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
"""
replaced = series.replace('n/a', False)
filled = replaced.fillna(False)
bool_list = filled.astype(bool).tolist()
return [int(value) for value in bool_list]


def extract_tags(hed_string, search_tag):
""" Extract all instances of specified tag from a tag_string.

Expand Down Expand Up @@ -129,6 +116,7 @@ def generate_sidecar_entry(column_name, column_values=None):
sidecar_entry["HED"] = hed
return sidecar_entry


def get_bids_dataset(data_root):
""" Return a BIDS dataset object given a path to a dataset root.

Expand All @@ -141,6 +129,7 @@ def get_bids_dataset(data_root):
"""
return BidsDataset(data_root)


def hed_to_df(sidecar_dict, col_names=None):
""" Return a 4-column dataframe of HED portions of sidecar.

Expand Down Expand Up @@ -202,6 +191,21 @@ def merge_hed_dict(sidecar_dict, hed_dict):
sidecar_dict[key]['Levels'] = value_dict['Levels']


def series_to_factor(series):
"""Convert a series to an integer factor list.

Parameters:
series (Series) - Series to be converted to a list.

Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
"""
replaced = series.replace('n/a', False)
filled = replaced.fillna(False)
bool_list = filled.astype(bool).tolist()
return [int(value) for value in bool_list]


def str_to_tabular(tsv_str, sidecar=None):
""" Return a TabularInput a tsv string.

Expand All @@ -223,9 +227,11 @@ def strs_to_sidecar(sidecar_strings):
sidecar_strings (string or list): String or strings representing sidecars.

Returns:
Sidecar: the merged sidecar from the list.
Sidecar or None: the merged sidecar from the list.
"""

if not sidecar_strings:
return None
if not isinstance(sidecar_strings, list):
sidecar_strings = [sidecar_strings]
if sidecar_strings:
Expand All @@ -236,6 +242,7 @@ def strs_to_sidecar(sidecar_strings):
else:
return None


def to_factor(data, column=None):
"""Convert data to an integer factor list.

Expand All @@ -246,11 +253,11 @@ def to_factor(data, column=None):
Returns:
list - contains 0's and 1's, empty, 'n/a' and np.NAN are converted to 0.
"""
if isinstance(data, pd.Series):
if isinstance(data, Series):
series = data
elif isinstance(data, pd.DataFrame) and column:
elif isinstance(data, DataFrame) and column:
series = data[column]
elif isinstance(data, pd.DataFrame):
elif isinstance(data, DataFrame):
series = data.iloc[:, 0]
else:
raise HedFileError("CannotConvertToFactor",
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/visualization/tag_word_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400
kwargs.setdefault('min_font_size', 8)
if 'font_path' not in kwargs:
kwargs['font_path'] = None
elif kwargs['font_path'] and not kwargs['font_path'].lower().endswith((".ttf", ".otf")):
elif kwargs['font_path'] and not kwargs['font_path'].lower().endswith((".ttf", ".otf", ".ttc")):
raise HedFileError("InvalidFontPath", f"Font {kwargs['font_path']} not valid on this system", "")

wc = WordCloud(background_color=background_color, mask=mask_image,
Expand Down
53 changes: 37 additions & 16 deletions tests/tools/analysis/test_annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from hed.models.sidecar import Sidecar
from hed.models.hed_string import HedString
from hed.models.tabular_input import TabularInput

from hed.tools.analysis import annotation_util
from hed.tools.analysis.tabular_summary import TabularSummary
from hed.tools.util import io_util
Expand Down Expand Up @@ -196,17 +197,6 @@ def test_generate_sidecar_entry(self):
self.assertIsInstance(entry2['HED'], str,
"generate_sidecar_entry HED entry should be str when no column values")

def test_series_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.series_to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
factor2 = annotation_util.series_to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)


def test_generate_sidecar_entry_non_letters(self):
entry1 = annotation_util.generate_sidecar_entry('my !#$-123_10',
column_values=['apple 1', '@banana', 'grape%cherry&'])
Expand Down Expand Up @@ -294,7 +284,7 @@ def test_merge_hed_dict_full(self):
skip_columns = ["onset", "duration", "sample", "trial", "response_time"]
value_columns = ["rep_lag", "stim_file", "value"]
event_files = io_util.get_file_list(self.bids_root_path, extensions=[".tsv"], name_suffix="_events",
exclude_dirs=exclude_dirs)
exclude_dirs=exclude_dirs)
value_sum = TabularSummary(value_cols=value_columns, skip_cols=skip_columns)
value_sum.update(event_files)
sidecar_template = value_sum.extract_sidecar_template()
Expand All @@ -305,6 +295,37 @@ def test_merge_hed_dict_full(self):
annotation_util.merge_hed_dict(example_sidecar, spreadsheet_sidecar)
self.assertEqual(6, len(example_sidecar), 'merge_hed_dict merges with the correct length')

def test_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.NaN, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
self.assertEqual(sum(factor3), 2)
factor4 = annotation_util.to_factor(df)
self.assertEqual(sum(factor4), 2)
with self.assertRaises(HedFileError) as context5:
annotation_util.to_factor(data)

def test_series_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.series_to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.NAN, 'n/a'])
factor2 = annotation_util.series_to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)

def test_strs_to_sidecar(self):
with open(self.json_path, 'r') as fp:
sidecar_dict = json.load(fp)
Expand Down Expand Up @@ -336,7 +357,6 @@ def test_to_strlist(self):
self.assertEqual(str_list2[0], 'Red,Sensory-event')
self.assertEqual(str_list2[2], '')


def test_flatten_cat_col(self):
col1 = self.sidecar2c["a"]
col2 = self.sidecar2c["b"]
Expand All @@ -358,13 +378,14 @@ def test_flatten_cat_col(self):
"_flatten_cat_col should use the Description tag if available")

def test_flatten_cat_col_only_description(self):
keys, values, descriptions, tags = annotation_util._flatten_cat_col("event_type",
{"HED": {"code1": "Description/Code 1 here."}})
keys, values, descriptions, tags = \
annotation_util._flatten_cat_col("event_type", {"HED": {"code1": "Description/Code 1 here."}})
self.assertIsInstance(tags, list)
self.assertEqual(tags[0], 'n/a')

def test_flatten_val_col_only_description(self):
keys, values, descriptions, tags = annotation_util._flatten_val_col("response", {"HED": "Description/Code 1 here."})
keys, values, descriptions, tags = annotation_util._flatten_val_col("response",
{"HED": "Description/Code 1 here."})
self.assertEqual(descriptions[0], 'Code 1 here.')
self.assertFalse(tags[0])

Expand Down