diff --git a/hed/tools/analysis/hed_tag_counts.py b/hed/tools/analysis/hed_tag_counts.py index 845e448b5..300319820 100644 --- a/hed/tools/analysis/hed_tag_counts.py +++ b/hed/tools/analysis/hed_tag_counts.py @@ -21,11 +21,11 @@ def __init__(self, hed_tag, file_name): self.set_value(hed_tag) def set_value(self, hed_tag): - """ Update the tag term value counts for a HedTag. - + """ Update the tag term value counts for a HedTag. + Parameters: - hed_tag (HedTag or None): Item to use to update the value counts. - + hed_tag (HedTag or None): Item to use to update the value counts. + """ if not hed_tag: return @@ -43,13 +43,13 @@ def get_info(self, verbose=False): else: files = len(self.files) return {'tag': self.tag, 'events': self.events, 'files': files} - + def get_summary(self): """ Return a dictionary summary of the events and files for this tag. - + Returns: dict: dictionary summary of events and files that contain this tag. - + """ return {'tag': self.tag, 'events': self.events, 'files': [name for name in self.files]} @@ -63,12 +63,11 @@ def get_empty(self): class HedTagCounts: """ Counts of HED tags for a tabular file. - + Parameters: name (str): An identifier for these counts (usually the filename of the tabular file) total_events (int): The total number of events in the tabular file. - """ def __init__(self, name, total_events=0): @@ -76,15 +75,15 @@ def __init__(self, name, total_events=0): self.name = name self.files = {} self.total_events = total_events - + def update_event_counts(self, hed_string_obj, file_name, definitions=None): - """ Update the tag counts based on a hed string object. - + """ Update the tag counts based on a hed string object. + Parameters: hed_string_obj (HedString): The HED string whose tags should be counted. file_name (str): The name of the file corresponding to these counts. definitions (dict): The definitions associated with the HED string. - + """ if file_name not in self.files: self.files[file_name] = "" @@ -100,17 +99,20 @@ def update_event_counts(self, hed_string_obj, file_name, definitions=None): self.merge_tag_dicts(tag_dict) def organize_tags(self, tag_template): + """ Organize tags into categories as specified by the tag_template. + + Parameters: + tag_template (dict): A dictionary whose keys are titles and values are lists of HED tags (str). + + Returns: + dict - keys are tags (strings) and values are list of HedTagCount for items fitting template. + list - of HedTagCount objects corresponding to tags that don't fit the template. + + """ template = self.create_template(tag_template) unmatched = [] - for key, tag_count in self.tag_dict.items(): - matched = False - for tag in reversed(tag_count.tag_terms): - if tag in template: - template[tag].append(tag_count) - matched = True - break - if not matched: - unmatched.append(tag_count) + for tag_count in self.tag_dict.values(): + self._update_template(tag_count, template, unmatched) return template, unmatched def merge_tag_dicts(self, other_dict): @@ -118,20 +120,21 @@ def merge_tag_dicts(self, other_dict): if tag not in self.tag_dict: self.tag_dict[tag] = count.get_empty() self.tag_dict[tag].events = self.tag_dict[tag].events + count.events - value_dict = self.tag_dict[tag].value_dict - for value, val_count in count.value_dict.items(): - if value in value_dict: - value_dict[value] = value_dict[value] + val_count - else: - value_dict[value] = val_count for file in count.files: self.tag_dict[tag].files[file] = '' + if not self.tag_dict[tag].value_dict: + continue + for value, val_count in count.value_dict.items(): + if value in self.tag_dict[tag].value_dict: + self.tag_dict[tag].value_dict[value] = self.tag_dict[tag].value_dict + val_count + else: + self.tag_dict[tag].value_dict[value] = val_count def get_summary(self): details = {} for tag, count in self.tag_dict.items(): details[tag] = count.get_summary() - return {'name': str(self.name), 'type_tag': self.type_tag, 'files': list(self.files.keys()), + return {'name': str(self.name), 'files': list(self.files.keys()), 'total_events': self.total_events, 'details': details} @staticmethod @@ -141,3 +144,19 @@ def create_template(tags): for element in key_list: template_dict[element.lower()] = [] return template_dict + + @staticmethod + def _update_template(tag_count, template, unmatched): + """ Update the template or unmatched with info in the tag_count. + + Parameters: + tag_count (HedTagCount): Information for a particular tag. + template (dict): The + + """ + tag_list = reversed(list(tag_count.tag_terms)) + for tkey in tag_list: + if tkey in template.keys(): + template[tkey].append(tag_count) + return + unmatched.append(tag_count) diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 2379dc4a5..f88650ccb 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -35,6 +35,7 @@ class SummarizeHedTagsOp(BaseOp): }, "optional_parameters": { "append_timecode": bool, + "expand_definitions": bool, "expand_context": bool } } diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py index 0950ea909..5f2eebc27 100644 --- a/tests/tools/analysis/test_hed_tag_counts.py +++ b/tests/tools/analysis/test_hed_tag_counts.py @@ -1,10 +1,10 @@ import os import unittest -from pandas import DataFrame from hed import schema as hedschema -from hed.models import Sidecar, TabularInput, HedString, HedTag +from hed.models import Sidecar, TabularInput, HedString +from hed.models.df_util import get_assembled from hed.tools import assemble_hed -from hed.tools.analysis.hed_tag_counts import HedTagCount, HedTagCounts +from hed.tools.analysis.hed_tag_counts import HedTagCounts # noinspection PyBroadException @@ -13,7 +13,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/bids_tests/eeg_ds003645s_hed')) + '../../data/bids_tests/eeg_ds003645s_hed')) schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/schema_tests/HED8.0.0.xml')) cls.bids_root_path = bids_root_path @@ -25,9 +25,20 @@ def setUpClass(cls): cls.hed_schema = schema sidecar1 = Sidecar(json_path, name='face_sub1_json') input_data = TabularInput(events_path, sidecar=sidecar1, name="face_sub1_events") + cls.input_data = input_data + cls.sidecar1 = sidecar1 input_df, def_dict = assemble_hed(input_data, sidecar1, schema, expand_defs=False) cls.input_df = input_df cls.def_dict = def_dict + cls.tag_template = { + "Sensory events": ["Sensory-event", "Sensory-presentation", "Sensory-attribute", + "Experimental-stimulus", "Task-stimulus-role", + "Task-attentional-demand", "Incidental", "Instructional", "Warning"], + "Agent actions": ["Agent-action", "Agent", "Action", "Agent-task-role", + "Task-action-type", "Participant-response"], + "Objects": ["Item"], + "Other events": ["Event", "Task-event-role", "Mishap"] + } def test_constructor(self): counts = HedTagCounts('Base_name') @@ -58,10 +69,24 @@ def test_merge_tag_dicts(self): def test_hed_tag_count(self): name = 'Base_name1' counts1 = HedTagCounts(name, 0) - counts1.update_event_counts(HedString(self.input_df.iloc[0]['HED_assembled'], self.hed_schema), + counts1.update_event_counts(HedString(self.input_df.iloc[0]['HED_assembled'], self.hed_schema), file_name=name) self.assertIsInstance(counts1, HedTagCounts) + def test_organize_tags(self): + counts = HedTagCounts('Base_name') + hed_strings, definitions = get_assembled(self.input_data, self.sidecar1, self.hed_schema, + extra_def_dicts=None, join_columns=True, + shrink_defs=False, expand_defs=True) + # definitions = input_data.get_definitions().gathered_defs + for hed in hed_strings: + counts.update_event_counts(hed, 'run-1') + self.assertIsInstance(counts.tag_dict, dict) + self.assertEqual(len(counts.tag_dict), 47) + org_tags, leftovers = counts.organize_tags(self.tag_template) + self.assertEqual(len(org_tags), 19) + self.assertEqual(len(leftovers), 22) + if __name__ == '__main__': unittest.main()