Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ portalocker>=2.7.0
semantic_version>=2.10.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
wordcloud>=1.9.2
43 changes: 43 additions & 0 deletions hed/tools/visualizations/tag_summary_word_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from wordcloud import WordCloud


def create_wordcloud(word_dict, width=400, height=200):
"""Takes a word dict and returns a generated word cloud object

Parameters:
word_dict(dict): words and their frequencies
width(int): width in pixels
height(int): height in pixels
Returns:
word_cloud(WordCloud): The generated cloud.
Use .to_file to save it out as an image.

:raises ValueError:
An empty dictionary was passed
"""
wc = WordCloud(background_color='white', width=width, height=height)

wc.generate_from_frequencies(word_dict)

return wc


def convert_summary_to_word_dict(summary_json):
"""Converts a HedTagSummary json dict into the word cloud input format

Parameters:
summary_json(dict): The summary from a summarize hed tags op

Returns:
word_dict(dict): a dict of the words and their occurrence count

:raises KeyError:
A malformed dictionary was passed
"""
tag_dict = summary_json['Dataset']['Overall summary']['Main tags']
word_dict = {}
for tag_sub_list in tag_dict.values():
for tag_sub_dict in tag_sub_list:
word_dict[tag_sub_dict['tag']] = tag_sub_dict['events']

return word_dict
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.7.0
semantic_version>=2.10.0
wordcloud>=1.9.2
51 changes: 51 additions & 0 deletions tests/tools/visualizations/test_tag_summary_word_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import unittest
from wordcloud import WordCloud
from hed.tools.visualizations import tag_summary_word_cloud

class TestWordCloudFunctions(unittest.TestCase):

def test_convert_summary_to_word_dict(self):
# Assume we have a valid summary_json
summary_json = {
'Dataset': {
'Overall summary': {
'Main tags': {
'tag_category_1': [
{'tag': 'tag1', 'events': 5},
{'tag': 'tag2', 'events': 3}
],
'tag_category_2': [
{'tag': 'tag3', 'events': 7}
]
}
}
}
}
expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}

word_dict = tag_summary_word_cloud.convert_summary_to_word_dict(summary_json)
self.assertEqual(word_dict, expected_output)

def test_create_wordcloud(self):
word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
width = 400
height = 200
wc = tag_summary_word_cloud.create_wordcloud(word_dict, width, height)

self.assertIsInstance(wc, WordCloud)
self.assertEqual(wc.width, width)
self.assertEqual(wc.height, height)

def test_create_wordcloud_with_empty_dict(self):
# Test creation of word cloud with an empty dictionary
word_dict = {}
with self.assertRaises(ValueError):
tag_summary_word_cloud.create_wordcloud(word_dict)

def test_create_wordcloud_with_single_word(self):
# Test creation of word cloud with a single word
word_dict = {'single_word': 1}
wc = tag_summary_word_cloud.create_wordcloud(word_dict)
self.assertIsInstance(wc, WordCloud)
# Check that the single word is in the word cloud
self.assertIn('single_word', wc.words_)