diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py index a40c0333b..389ba92f8 100644 --- a/hed/tools/visualization/__init__.py +++ b/hed/tools/visualization/__init__.py @@ -1 +1 @@ -from .tag_word_cloud import create_wordcloud, summary_to_dict +from .tag_word_cloud import create_wordcloud, summary_to_dict, word_cloud_to_svg diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index 68a3a257d..9f9092cba 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -1,9 +1,9 @@ import numpy as np from PIL import Image -from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud +from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg -def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=200, **kwargs): +def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=None, **kwargs): """Takes a word dict and returns a generated word cloud object Parameters: @@ -25,6 +25,13 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 mask_image = load_and_resize_mask(mask_path, width, height) width = mask_image.shape[1] height = mask_image.shape[0] + if height is None: + if width is None: + width = 400 + height = width // 2 + if width is None: + width = height * 2 + kwargs.setdefault('contour_width', 3) kwargs.setdefault('contour_color', 'black') kwargs.setdefault('prefer_horizontal', 0.75) @@ -41,6 +48,20 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 return wc +def word_cloud_to_svg(wc): + """Takes word cloud and returns it as an SVG string. + + Parameters: + wc(WordCloud): the word cloud object + Returns: + svg_string(str): The svg for the word cloud + """ + svg_string = wc.to_svg() + svg_string = svg_string.replace("fill:", "fill:rgb") + svg_string = svg_string.replace("", generate_contour_svg(wc, wc.width, wc.height) + "") + return svg_string + + def summary_to_dict(summary, transform=np.log10, adjustment=5): """Converts a HedTagSummary json dict into the word cloud input format diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py index 6071a138a..490be199f 100644 --- a/hed/tools/visualization/word_cloud_util.py +++ b/hed/tools/visualization/word_cloud_util.py @@ -7,14 +7,31 @@ from wordcloud import WordCloud -def _draw_contour(wc, img): +def generate_contour_svg(wc, width, height): + """Generates an SVG contour mask based on a word cloud object and dimensions. + + Parameters: + wc (WordCloud): The word cloud object. + width (int): SVG image width in pixels. + height (int): SVG image height in pixels. + + Returns: + str: SVG point list for the contour mask, or empty string if not generated. + """ + contour = _get_contour_mask(wc, width, height) + if contour is None: + return "" + return _numpy_to_svg(contour) + + +def _get_contour_mask(wc, width, height): """Slightly tweaked copy of internal WorldCloud function to allow transparency""" if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None: - return img + return None mask = wc._get_bolean_mask(wc.mask) * 255 contour = Image.fromarray(mask.astype(np.uint8)) - contour = contour.resize(img.size) + contour = contour.resize((width, height)) contour = contour.filter(ImageFilter.FIND_EDGES) contour = np.array(contour) @@ -22,6 +39,15 @@ def _draw_contour(wc, img): contour[[0, -1], :] = 0 contour[:, [0, -1]] = 0 + return contour + + +def _draw_contour(wc, img): + """Slightly tweaked copy of internal WorldCloud function to allow transparency""" + contour = _get_contour_mask(wc, img.width, img.height) + if contour is None: + return img + # use gaussian to change width, divide by 10 to give more resolution radius = wc.contour_width / 10 contour = Image.fromarray(contour) @@ -44,6 +70,15 @@ def _draw_contour(wc, img): WordCloud._draw_contour = _draw_contour +def _numpy_to_svg(contour): + svg_elements = [] + points = np.array(contour.nonzero()).T + for y, x in points: + svg_elements.append(f'') + + return '\n'.join(svg_elements) + + def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None): """Random color generation func""" if random_state is None: diff --git a/tests/data/visualization/word_mask.png b/tests/data/visualization/word_mask.png new file mode 100644 index 000000000..e235d063e Binary files /dev/null and b/tests/data/visualization/word_mask.png differ diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 357584cc1..5005f55c5 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -134,5 +134,15 @@ def test_expand_defs(self): hed_string.expand_defs() self.assertEqual(str(hed_string), expected_results[key]) + def test_altering_definition_contents(self): + def_dict = DefinitionDict("(Definition/DefName, (Event, Action))", self.hed_schema) + hed_string1 = HedString("Def/DefName", self.hed_schema, def_dict) + hed_string2 = HedString("Def/DefName", self.hed_schema, def_dict) + hed_string1.expand_defs() + hed_string2.expand_defs() + hed_string1.remove([hed_string1.get_all_tags()[2]]) + + self.assertNotEqual(hed_string1, hed_string2) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py index 2b515c941..6bb940eec 100644 --- a/tests/tools/visualization/test_tag_word_cloud.py +++ b/tests/tools/visualization/test_tag_word_cloud.py @@ -2,12 +2,19 @@ from wordcloud import WordCloud from hed.tools.visualization import tag_word_cloud from hed.tools.visualization.tag_word_cloud import load_and_resize_mask +from hed.tools.visualization.word_cloud_util import generate_contour_svg + import numpy as np from PIL import Image, ImageDraw import os class TestWordCloudFunctions(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__), + '../../data/visualization/word_mask.png')) + def test_convert_summary_to_word_dict(self): # Assume we have a valid summary_json summary_json = { @@ -40,6 +47,30 @@ def test_create_wordcloud(self): self.assertEqual(wc.width, width) self.assertEqual(wc.height, height) + def test_create_wordcloud_default_params(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, 400) + self.assertEqual(wc.height, 200) + + def test_mask_scaling(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=300) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, 300) + self.assertEqual(wc.height, 300) + + def test_mask_scaling2(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=None) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, 300) + self.assertLess(wc.height, 300) + def test_create_wordcloud_with_empty_dict(self): # Test creation of word cloud with an empty dictionary word_dict = {} @@ -54,6 +85,15 @@ def test_create_wordcloud_with_single_word(self): # Check that the single word is in the word cloud self.assertIn('single_word', wc.words_) + def test_valid_word_cloud(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict, mask_path=self.mask_path, width=400, height=None) + svg_output = tag_word_cloud.word_cloud_to_svg(wc) + self.assertTrue(svg_output.startswith('