diff --git a/docs/requirements.txt b/docs/requirements.txt index 94b716c1a..9783a3079 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -8,3 +8,4 @@ portalocker>=2.7.0 semantic_version>=2.10.0 Sphinx>=5.2.2 sphinx_rtd_theme>=1.0.0 +wordcloud>=1.9.2 diff --git a/hed/tools/visualizations/tag_summary_word_cloud.py b/hed/tools/visualizations/tag_summary_word_cloud.py new file mode 100644 index 000000000..a3949206e --- /dev/null +++ b/hed/tools/visualizations/tag_summary_word_cloud.py @@ -0,0 +1,43 @@ +from wordcloud import WordCloud + + +def create_wordcloud(word_dict, width=400, height=200): + """Takes a word dict and returns a generated word cloud object + + Parameters: + word_dict(dict): words and their frequencies + width(int): width in pixels + height(int): height in pixels + Returns: + word_cloud(WordCloud): The generated cloud. + Use .to_file to save it out as an image. + + :raises ValueError: + An empty dictionary was passed + """ + wc = WordCloud(background_color='white', width=width, height=height) + + wc.generate_from_frequencies(word_dict) + + return wc + + +def convert_summary_to_word_dict(summary_json): + """Converts a HedTagSummary json dict into the word cloud input format + + Parameters: + summary_json(dict): The summary from a summarize hed tags op + + Returns: + word_dict(dict): a dict of the words and their occurrence count + + :raises KeyError: + A malformed dictionary was passed + """ + tag_dict = summary_json['Dataset']['Overall summary']['Main tags'] + word_dict = {} + for tag_sub_list in tag_dict.values(): + for tag_sub_dict in tag_sub_list: + word_dict[tag_sub_dict['tag']] = tag_sub_dict['events'] + + return word_dict diff --git a/requirements.txt b/requirements.txt index 443e763d2..7dd623faa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ openpyxl>=3.1.0 pandas>=1.3.5 portalocker>=2.7.0 semantic_version>=2.10.0 +wordcloud>=1.9.2 diff --git a/tests/tools/visualizations/test_tag_summary_word_cloud.py b/tests/tools/visualizations/test_tag_summary_word_cloud.py new file mode 100644 index 000000000..b1d5c4853 --- /dev/null +++ b/tests/tools/visualizations/test_tag_summary_word_cloud.py @@ -0,0 +1,51 @@ +import unittest +from wordcloud import WordCloud +from hed.tools.visualizations import tag_summary_word_cloud + +class TestWordCloudFunctions(unittest.TestCase): + + def test_convert_summary_to_word_dict(self): + # Assume we have a valid summary_json + summary_json = { + 'Dataset': { + 'Overall summary': { + 'Main tags': { + 'tag_category_1': [ + {'tag': 'tag1', 'events': 5}, + {'tag': 'tag2', 'events': 3} + ], + 'tag_category_2': [ + {'tag': 'tag3', 'events': 7} + ] + } + } + } + } + expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7} + + word_dict = tag_summary_word_cloud.convert_summary_to_word_dict(summary_json) + self.assertEqual(word_dict, expected_output) + + def test_create_wordcloud(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + width = 400 + height = 200 + wc = tag_summary_word_cloud.create_wordcloud(word_dict, width, height) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, width) + self.assertEqual(wc.height, height) + + def test_create_wordcloud_with_empty_dict(self): + # Test creation of word cloud with an empty dictionary + word_dict = {} + with self.assertRaises(ValueError): + tag_summary_word_cloud.create_wordcloud(word_dict) + + def test_create_wordcloud_with_single_word(self): + # Test creation of word cloud with a single word + word_dict = {'single_word': 1} + wc = tag_summary_word_cloud.create_wordcloud(word_dict) + self.assertIsInstance(wc, WordCloud) + # Check that the single word is in the word cloud + self.assertIn('single_word', wc.words_) \ No newline at end of file