hed-standard · IanCa · Jun 30, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 30, 2023
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -25,7 +25,7 @@ jobs:
       - uses: actions/cache@v3
         with:
           path: ${{ env.pythonLocation }}
-          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}
 
       - name: Install dependencies
         run: |
@@ -85,7 +85,7 @@ jobs:
       - uses: actions/cache@v3
         with:
           path: ${{ env.pythonLocation }}
-          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}
 
       - name: Install dependencies
         run: |

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,11 +1,11 @@
 defusedxml>=0.7.1
-inflect>=6.0.2
-myst-parser>=0.18.1
+inflect==6.0.2
 numpy>=1.21.6
 openpyxl>=3.1.0
 pandas>=1.3.5
 portalocker>=2.7.0
+pydantic<2  # For compatibility with inflect
 semantic_version>=2.10.0
 Sphinx>=5.2.2
 sphinx_rtd_theme>=1.0.0
-wordcloud>=1.9.2
+wordcloud==1.9.2
diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py
@@ -0,0 +1 @@
+from .tag_word_cloud import create_wordcloud, summary_to_dict
diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py
@@ -1,46 +1,112 @@
-from wordcloud import WordCloud
+import numpy as np
+from PIL import Image
+from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud
 
 
-def create_wordcloud(word_dict, width=400, height=200):
+def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=200, **kwargs):
     """Takes a word dict and returns a generated word cloud object
 
     Parameters:
         word_dict(dict): words and their frequencies
+        mask_path(str or None): The path of the mask file
+        background_color(str or None): If None, transparent background.
         width(int): width in pixels
         height(int): height in pixels
+        kwargs(kwargs): Any other parameters WordCloud accepts, overrides default values where relevant.
     Returns:
         word_cloud(WordCloud): The generated cloud.
                                Use .to_file to save it out as an image.
 
     :raises ValueError:
         An empty dictionary was passed
     """
-    wc = WordCloud(background_color='white', width=width, height=height)
+    mask_image = None
+    if mask_path:
+        mask_image = load_and_resize_mask(mask_path, width, height)
+        width = mask_image.shape[1]
+        height = mask_image.shape[0]
+    kwargs.setdefault('contour_width', 3)
+    kwargs.setdefault('contour_color', 'black')
+    kwargs.setdefault('prefer_horizontal', 0.75)
+    kwargs.setdefault('default_color_func', default_color_func)
+    kwargs.setdefault('relative_scaling', 1)
+    kwargs.setdefault('max_font_size', height / 15)
+    kwargs.setdefault('min_font_size', 5)
+
+    wc = WordCloud(background_color=background_color, mask=mask_image,
+                   width=width, height=height, mode="RGBA", **kwargs)
 
     wc.generate_from_frequencies(word_dict)
 
     return wc
 
 
-def summary_to_dict(summary):
+def summary_to_dict(summary, transform=np.log10, adjustment=5):
     """Converts a HedTagSummary json dict into the word cloud input format
 
     Parameters:
         summary(dict): The summary from a summarize hed tags op
-
+        transform(func): The function to transform the number of found tags
+                         Default log10
+        adjustment(int): Value added after transform.
     Returns:
         word_dict(dict): a dict of the words and their occurrence count
 
     :raises KeyError:
         A malformed dictionary was passed
 
     """
+    if transform is None:
+        transform = lambda x: x
     overall_summary = summary.get("Overall summary", {})
     specifics = overall_summary.get("Specifics", {})
     tag_dict = specifics.get("Main tags", {})
     word_dict = {}
     for tag_sub_list in tag_dict.values():
         for tag_sub_dict in tag_sub_list:
-            word_dict[tag_sub_dict['tag']] = tag_sub_dict['events']
+            word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment
 
     return word_dict
+
+
+def load_and_resize_mask(mask_path, width=None, height=None):
+    """ Load a mask image and resize it according to given dimensions.
+
+        The image is resized maintaining aspect ratio if only width or height is provided.
+
+        Returns None if no mask_path.
+
+    Parameters:
+        mask_path (str): The path to the mask image file.
+        width (int, optional): The desired width of the resized image. If only width is provided,
+            the image is scaled to maintain its original aspect ratio. Defaults to None.
+        height (int, optional): The desired height of the resized image. If only height is provided,
+            the image is scaled to maintain its original aspect ratio. Defaults to None.
+
+    Returns:
+        numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255).
+    """
+    if mask_path:
+        mask_image = Image.open(mask_path)
+
+        if width or height:
+            original_size = np.array((mask_image.width, mask_image.height))
+            output_size = np.array((width, height))
+            # Handle one missing param
+            if not height:
+                scale = original_size[0] / width
+                output_size = original_size / scale
+            elif not width:
+                scale = original_size[1] / height
+                output_size = original_size / scale
+
+            mask_image = mask_image.resize(output_size.astype(int), Image.LANCZOS)
+
+            # Convert to greyscale then to binary black and white (0 or 255)
+            mask_image = mask_image.convert('L')
+            mask_image_array = np.array(mask_image)
+            mask_image_array = np.where(mask_image_array > 127, 255, 0)
+        else:
+            mask_image_array = np.array(mask_image)
+
+        return mask_image_array.astype(np.uint8)
diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py
@@ -0,0 +1,86 @@
+import random
+from random import Random
+
+import numpy as np
+from PIL import Image, ImageFilter
+from matplotlib import cm
+from wordcloud import WordCloud
+
+
+def _draw_contour(wc, img):
+    """Slightly tweaked copy of internal WorldCloud function to allow transparency"""
+    if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None:
+        return img
+
+    mask = wc._get_bolean_mask(wc.mask) * 255
+    contour = Image.fromarray(mask.astype(np.uint8))
+    contour = contour.resize(img.size)
+    contour = contour.filter(ImageFilter.FIND_EDGES)
+    contour = np.array(contour)
+
+    # make sure borders are not drawn before changing width
+    contour[[0, -1], :] = 0
+    contour[:, [0, -1]] = 0
+
+    # use gaussian to change width, divide by 10 to give more resolution
+    radius = wc.contour_width / 10
+    contour = Image.fromarray(contour)
+    contour = contour.filter(ImageFilter.GaussianBlur(radius=radius))
+    contour = np.array(contour) > 0
+    if img.mode == 'RGBA':
+        contour = np.dstack((contour, contour, contour, contour))
+    else:
+        contour = np.dstack((contour, contour, contour))
+
+    # color the contour
+    ret = np.array(img) * np.invert(contour)
+    color = np.array(Image.new(img.mode, img.size, wc.contour_color))
+    ret += color * contour
+
+    return Image.fromarray(ret)
+
+# Replace WordCloud function with one that can handle transparency
+WordCloud._draw_contour = _draw_contour
+
+
+def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
+    """Random color generation func"""
+    if random_state is None:
+        random_state = Random()
+    return f"hsl({random_state.randint(0, 255)}, {random_state.randint(50, 100)}%, {random_state.randint(0, 50)}%)"
+
+
+class ColormapColorFunc:
+    def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_range=(0.15, 0.25)):
+        """Initialize a word cloud color generator.
+
+        Parameters:
+            colormap (str, optional): The name of the matplotlib colormap to use for generating colors.
+                                      Defaults to 'nipy_spectral'.
+            color_range (tuple of float, optional): A tuple containing the minimum and maximum values to use
+                                                    from the colormap. Defaults to (0.0, 0.5).
+            color_step_range (tuple of float, optional): A tuple containing the minimum and maximum values to step
+                                                         through the colormap. Defaults to (0.15, 0.25).
+                                                         This is the speed at which it goes through the range chosen.
+                                                         .25 means it will go through 1/4 of the range each pick.
+        """
+        self.colormap = cm.get_cmap(colormap)
+        self.color_range = color_range
+        self.color_step_range = color_step_range
+        self.current_fraction = random.uniform(0, 1)  # Start at a random point
+
+    def color_func(self, word, font_size, position, orientation, random_state=None, **kwargs):
+        # Update the current color fraction and wrap around if necessary
+        color_step = random.uniform(*self.color_step_range)
+        self.current_fraction = (self.current_fraction + color_step) % 1.0
+
+        # Scale the fraction to the desired range
+        scaled_fraction = self.color_range[0] + (self.current_fraction * (self.color_range[1] - self.color_range[0]))
+
+        # Get the color from the colormap
+        color = self.colormap(scaled_fraction)
+
+        return tuple(int(c * 255) for c in color[:3])  # Convert to RGB format
+
+
+default_color_func = ColormapColorFunc().color_func
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,9 @@
 defusedxml>=0.7.1
-inflect>=6.0.2
+inflect==6.0.2
 numpy>=1.21.6
 openpyxl>=3.1.0
 pandas>=1.3.5
 portalocker>=2.7.0
+pydantic<2  # For compatibility with inflect
 semantic_version>=2.10.0
-wordcloud>=1.9.2
+wordcloud==1.9.2
diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py
@@ -0,0 +1,110 @@
+import unittest
+from wordcloud import WordCloud
+from hed.tools.visualization import tag_word_cloud
+from hed.tools.visualization.tag_word_cloud import load_and_resize_mask
+import numpy as np
+from PIL import Image, ImageDraw
+import os
+
+
+class TestWordCloudFunctions(unittest.TestCase):
+    def test_convert_summary_to_word_dict(self):
+        # Assume we have a valid summary_json
+        summary_json = {
+            'Overall summary': {
+                'Specifics': {
+                    'Main tags': {
+                        'tag_category_1': [
+                            {'tag': 'tag1', 'events': 5},
+                            {'tag': 'tag2', 'events': 3}
+                        ],
+                        'tag_category_2': [
+                            {'tag': 'tag3', 'events': 7}
+                        ]
+                    }
+                }
+            }
+        }
+        expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}
+
+        word_dict = tag_word_cloud.summary_to_dict(summary_json, transform=None, adjustment=0)
+        self.assertEqual(word_dict, expected_output)
+
+    def test_create_wordcloud(self):
+        word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
+        width = 400
+        height = 200
+        wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height)
+
+        self.assertIsInstance(wc, WordCloud)
+        self.assertEqual(wc.width, width)
+        self.assertEqual(wc.height, height)
+
+    def test_create_wordcloud_with_empty_dict(self):
+        # Test creation of word cloud with an empty dictionary
+        word_dict = {}
+        with self.assertRaises(ValueError):
+            tag_word_cloud.create_wordcloud(word_dict)
+
+    def test_create_wordcloud_with_single_word(self):
+        # Test creation of word cloud with a single word
+        word_dict = {'single_word': 1}
+        wc = tag_word_cloud.create_wordcloud(word_dict)
+        self.assertIsInstance(wc, WordCloud)
+        # Check that the single word is in the word cloud
+        self.assertIn('single_word', wc.words_)
+
+
+class TestLoadAndResizeMask(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # Create a simple black and white image
+        cls.original_size = (300, 200)
+        cls.img = Image.new('L', cls.original_size, 0) # Start with a black image
+
+        # Draw a white circle in the middle of the image
+        d = ImageDraw.Draw(cls.img)
+        circle_radius = min(cls.original_size) // 4 # Radius of the circle is a quarter of the smaller dimension of the image
+        circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Center of the circle is the center of the image
+        d.ellipse((circle_center[0] - circle_radius,
+                   circle_center[1] - circle_radius,
+                   circle_center[0] + circle_radius,
+                   circle_center[1] + circle_radius),
+                  fill=255) # Fill the ellipse with white
+        cls.img_path = 'temp_img.bmp'
+        cls.img.save(cls.img_path)
+
+    @classmethod
+    def tearDownClass(cls):
+        # Clean up the temp image
+        os.remove(cls.img_path)
+
+    def test_no_resizing(self):
+        mask = load_and_resize_mask(self.img_path)
+        mask_img = Image.fromarray(mask)
+        self.assertEqual((mask_img.width, mask_img.height), self.original_size)
+
+    def test_width_resizing(self):
+        width = 150
+        mask = load_and_resize_mask(self.img_path, width=width)
+        mask_img = Image.fromarray(mask)
+        expected_width, expected_height = width, int(self.original_size[1] * width / self.original_size[0])
+        self.assertEqual((mask_img.width, mask_img.height), (expected_width, expected_height))
+
+    def test_height_resizing(self):
+        height = 100
+        mask = load_and_resize_mask(self.img_path, height=height)
+        mask_img = Image.fromarray(mask)
+        expected_shape = (int(self.original_size[0] * height / self.original_size[1]), height)
+        self.assertEqual((mask_img.width, mask_img.height), expected_shape)
+
+    def test_both_dimensions_resizing(self):
+        width, height = 100, 75
+        mask = load_and_resize_mask(self.img_path, width=width, height=height)
+        self.assertEqual(mask.shape, (height, width))
+
+    def test_mask_color(self):
+        mask = load_and_resize_mask(self.img_path)
+        # Since we created an image with '1' mode, all values should be either 0 or 255
+        unique_values = np.unique(mask)
+        self.assertCountEqual(unique_values, [0, 255])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .tag_word_cloud import create_wordcloud, summary_to_dict