Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 48 additions & 29 deletions hed/tools/analysis/hed_tag_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ def __init__(self, hed_tag, file_name):
self.set_value(hed_tag)

def set_value(self, hed_tag):
""" Update the tag term value counts for a HedTag.
""" Update the tag term value counts for a HedTag.

Parameters:
hed_tag (HedTag or None): Item to use to update the value counts.
hed_tag (HedTag or None): Item to use to update the value counts.

"""
if not hed_tag:
return
Expand All @@ -43,13 +43,13 @@ def get_info(self, verbose=False):
else:
files = len(self.files)
return {'tag': self.tag, 'events': self.events, 'files': files}

def get_summary(self):
""" Return a dictionary summary of the events and files for this tag.

Returns:
dict: dictionary summary of events and files that contain this tag.

"""
return {'tag': self.tag, 'events': self.events, 'files': [name for name in self.files]}

Expand All @@ -63,28 +63,27 @@ def get_empty(self):

class HedTagCounts:
""" Counts of HED tags for a tabular file.

Parameters:
name (str): An identifier for these counts (usually the filename of the tabular file)
total_events (int): The total number of events in the tabular file.


"""

def __init__(self, name, total_events=0):
self.tag_dict = {}
self.name = name
self.files = {}
self.total_events = total_events

def update_event_counts(self, hed_string_obj, file_name, definitions=None):
""" Update the tag counts based on a hed string object.
""" Update the tag counts based on a hed string object.

Parameters:
hed_string_obj (HedString): The HED string whose tags should be counted.
file_name (str): The name of the file corresponding to these counts.
definitions (dict): The definitions associated with the HED string.

"""
if file_name not in self.files:
self.files[file_name] = ""
Expand All @@ -100,38 +99,42 @@ def update_event_counts(self, hed_string_obj, file_name, definitions=None):
self.merge_tag_dicts(tag_dict)

def organize_tags(self, tag_template):
""" Organize tags into categories as specified by the tag_template.

Parameters:
tag_template (dict): A dictionary whose keys are titles and values are lists of HED tags (str).

Returns:
dict - keys are tags (strings) and values are list of HedTagCount for items fitting template.
list - of HedTagCount objects corresponding to tags that don't fit the template.

"""
template = self.create_template(tag_template)
unmatched = []
for key, tag_count in self.tag_dict.items():
matched = False
for tag in reversed(tag_count.tag_terms):
if tag in template:
template[tag].append(tag_count)
matched = True
break
if not matched:
unmatched.append(tag_count)
for tag_count in self.tag_dict.values():
self._update_template(tag_count, template, unmatched)
return template, unmatched

def merge_tag_dicts(self, other_dict):
for tag, count in other_dict.items():
if tag not in self.tag_dict:
self.tag_dict[tag] = count.get_empty()
self.tag_dict[tag].events = self.tag_dict[tag].events + count.events
value_dict = self.tag_dict[tag].value_dict
for value, val_count in count.value_dict.items():
if value in value_dict:
value_dict[value] = value_dict[value] + val_count
else:
value_dict[value] = val_count
for file in count.files:
self.tag_dict[tag].files[file] = ''
if not self.tag_dict[tag].value_dict:
continue
for value, val_count in count.value_dict.items():
if value in self.tag_dict[tag].value_dict:
self.tag_dict[tag].value_dict[value] = self.tag_dict[tag].value_dict + val_count
else:
self.tag_dict[tag].value_dict[value] = val_count

def get_summary(self):
details = {}
for tag, count in self.tag_dict.items():
details[tag] = count.get_summary()
return {'name': str(self.name), 'type_tag': self.type_tag, 'files': list(self.files.keys()),
return {'name': str(self.name), 'files': list(self.files.keys()),
'total_events': self.total_events, 'details': details}

@staticmethod
Expand All @@ -141,3 +144,19 @@ def create_template(tags):
for element in key_list:
template_dict[element.lower()] = []
return template_dict

@staticmethod
def _update_template(tag_count, template, unmatched):
""" Update the template or unmatched with info in the tag_count.

Parameters:
tag_count (HedTagCount): Information for a particular tag.
template (dict): The

"""
tag_list = reversed(list(tag_count.tag_terms))
for tkey in tag_list:
if tkey in template.keys():
template[tkey].append(tag_count)
return
unmatched.append(tag_count)
1 change: 1 addition & 0 deletions hed/tools/remodeling/operations/summarize_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class SummarizeHedTagsOp(BaseOp):
},
"optional_parameters": {
"append_timecode": bool,
"expand_definitions": bool,
"expand_context": bool
}
}
Expand Down
35 changes: 30 additions & 5 deletions tests/tools/analysis/test_hed_tag_counts.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
import unittest
from pandas import DataFrame
from hed import schema as hedschema
from hed.models import Sidecar, TabularInput, HedString, HedTag
from hed.models import Sidecar, TabularInput, HedString
from hed.models.df_util import get_assembled
from hed.tools import assemble_hed
from hed.tools.analysis.hed_tag_counts import HedTagCount, HedTagCounts
from hed.tools.analysis.hed_tag_counts import HedTagCounts


# noinspection PyBroadException
Expand All @@ -13,7 +13,7 @@ class Test(unittest.TestCase):
@classmethod
def setUpClass(cls):
bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../data/bids_tests/eeg_ds003645s_hed'))
'../../data/bids_tests/eeg_ds003645s_hed'))
schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../data/schema_tests/HED8.0.0.xml'))
cls.bids_root_path = bids_root_path
Expand All @@ -25,9 +25,20 @@ def setUpClass(cls):
cls.hed_schema = schema
sidecar1 = Sidecar(json_path, name='face_sub1_json')
input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events")
cls.input_data = input_data
cls.sidecar1 = sidecar1
input_df, def_dict = assemble_hed(input_data, sidecar1, schema, expand_defs=False)
cls.input_df = input_df
cls.def_dict = def_dict
cls.tag_template = {
"Sensory events": ["Sensory-event", "Sensory-presentation", "Sensory-attribute",
"Experimental-stimulus", "Task-stimulus-role",
"Task-attentional-demand", "Incidental", "Instructional", "Warning"],
"Agent actions": ["Agent-action", "Agent", "Action", "Agent-task-role",
"Task-action-type", "Participant-response"],
"Objects": ["Item"],
"Other events": ["Event", "Task-event-role", "Mishap"]
}

def test_constructor(self):
counts = HedTagCounts('Base_name')
Expand Down Expand Up @@ -58,10 +69,24 @@ def test_merge_tag_dicts(self):
def test_hed_tag_count(self):
name = 'Base_name1'
counts1 = HedTagCounts(name, 0)
counts1.update_event_counts(HedString(self.input_df.iloc[0]['HED_assembled'], self.hed_schema),
counts1.update_event_counts(HedString(self.input_df.iloc[0]['HED_assembled'], self.hed_schema),
file_name=name)
self.assertIsInstance(counts1, HedTagCounts)

def test_organize_tags(self):
counts = HedTagCounts('Base_name')
hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema,
extra_def_dicts=None, join_columns=True,
shrink_defs=False, expand_defs=True)
# definitions = input_data.get_definitions().gathered_defs
for hed in hed_strings:
counts.update_event_counts(hed, 'run-1')
self.assertIsInstance(counts.tag_dict, dict)
self.assertEqual(len(counts.tag_dict), 47)
org_tags, leftovers = counts.organize_tags(self.tag_template)
self.assertEqual(len(org_tags), 19)
self.assertEqual(len(leftovers), 22)


if __name__ == '__main__':
unittest.main()