From eca3e252dd9bf3514d9c33cf168b97aff3dd93ac Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 29 Jun 2023 17:46:05 -0500
Subject: [PATCH 1/3] Second pass word cloud

---
 docs/requirements.txt                         |   2 +-
 hed/tools/visualization/__init__.py           |   1 +
 hed/tools/visualization/tag_word_cloud.py     |  78 ++++++++++++-
 hed/tools/visualization/word_cloud_util.py    |  86 ++++++++++++++
 requirements.txt                              |   2 +-
 .../visualization/test_tag_word_cloud.py      | 110 ++++++++++++++++++
 .../visualizations/test_tag_word_cloud.py     |  52 ---------
 7 files changed, 271 insertions(+), 60 deletions(-)
 create mode 100644 hed/tools/visualization/__init__.py
 create mode 100644 hed/tools/visualization/word_cloud_util.py
 create mode 100644 tests/tools/visualization/test_tag_word_cloud.py
 delete mode 100644 tests/tools/visualizations/test_tag_word_cloud.py

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 9783a3079..b5ee9d833 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -8,4 +8,4 @@ portalocker>=2.7.0
 semantic_version>=2.10.0
 Sphinx>=5.2.2
 sphinx_rtd_theme>=1.0.0
-wordcloud>=1.9.2
+wordcloud==1.9.2
diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py
new file mode 100644
index 000000000..677aba8ac
--- /dev/null
+++ b/hed/tools/visualization/__init__.py
@@ -0,0 +1 @@
+from tag_word_cloud import create_wordcloud, summary_to_dict
diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py
index 2f2c25236..4e352f387 100644
--- a/hed/tools/visualization/tag_word_cloud.py
+++ b/hed/tools/visualization/tag_word_cloud.py
@@ -1,13 +1,18 @@
-from wordcloud import WordCloud
+import numpy as np
+from PIL import Image
+from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud
 
 
-def create_wordcloud(word_dict, width=400, height=200):
+def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=200, **kwargs):
     """Takes a word dict and returns a generated word cloud object
 
     Parameters:
         word_dict(dict): words and their frequencies
+        mask_path(str or None): The path of the mask file
+        background_color(str or None): If None, transparent background.
         width(int): width in pixels
         height(int): height in pixels
+        kwargs(kwargs): Any other parameters WordCloud accepts, overrides default values where relevant.
     Returns:
         word_cloud(WordCloud): The generated cloud.
                                Use .to_file to save it out as an image.
@@ -15,19 +20,35 @@ def create_wordcloud(word_dict, width=400, height=200):
     :raises ValueError:
         An empty dictionary was passed
     """
-    wc = WordCloud(background_color='white', width=width, height=height)
+    mask_image = None
+    if mask_path:
+        mask_image = load_and_resize_mask(mask_path, width, height)
+        width = mask_image.shape[1]
+        height = mask_image.shape[0]
+    kwargs.setdefault('contour_width', 3)
+    kwargs.setdefault('contour_color', 'black')
+    kwargs.setdefault('prefer_horizontal', 0.75)
+    kwargs.setdefault('default_color_func', default_color_func)
+    kwargs.setdefault('relative_scaling', 1)
+    kwargs.setdefault('max_font_size', height / 15)
+    kwargs.setdefault('min_font_size', 5)
+
+    wc = WordCloud(background_color=background_color, mask=mask_image,
+                   width=width, height=height, mode="RGBA", **kwargs)
 
     wc.generate_from_frequencies(word_dict)
 
     return wc
 
 
-def summary_to_dict(summary):
+def summary_to_dict(summary, transform=np.log10, adjustment=5):
     """Converts a HedTagSummary json dict into the word cloud input format
 
     Parameters:
         summary(dict): The summary from a summarize hed tags op
-
+        transform(func): The function to transform the number of found tags
+                         Default log10
+        adjustment(int): Value added after transform.
     Returns:
         word_dict(dict): a dict of the words and their occurrence count
         
@@ -35,12 +56,57 @@ def summary_to_dict(summary):
         A malformed dictionary was passed
         
     """
+    if transform is None:
+        transform = lambda x: x
     overall_summary = summary.get("Overall summary", {})
     specifics = overall_summary.get("Specifics", {})
     tag_dict = specifics.get("Main tags", {})
     word_dict = {}
     for tag_sub_list in tag_dict.values():
         for tag_sub_dict in tag_sub_list:
-            word_dict[tag_sub_dict['tag']] = tag_sub_dict['events']
+            word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment
 
     return word_dict
+
+
+def load_and_resize_mask(mask_path, width=None, height=None):
+    """ Load a mask image and resize it according to given dimensions.
+
+        The image is resized maintaining aspect ratio if only width or height is provided.
+
+        Returns None if no mask_path.
+
+    Parameters:
+        mask_path (str): The path to the mask image file.
+        width (int, optional): The desired width of the resized image. If only width is provided,
+            the image is scaled to maintain its original aspect ratio. Defaults to None.
+        height (int, optional): The desired height of the resized image. If only height is provided,
+            the image is scaled to maintain its original aspect ratio. Defaults to None.
+
+    Returns:
+        numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255).
+    """
+    if mask_path:
+        mask_image = Image.open(mask_path)
+
+        if width or height:
+            original_size = np.array((mask_image.width, mask_image.height))
+            output_size = np.array((width, height))
+            # Handle one missing param
+            if not height:
+                scale = original_size[0] / width
+                output_size = original_size / scale
+            elif not width:
+                scale = original_size[1] / height
+                output_size = original_size / scale
+
+            mask_image = mask_image.resize(output_size.astype(int), Image.LANCZOS)
+
+            # Convert to greyscale then to binary black and white (0 or 255)
+            mask_image = mask_image.convert('L')
+            mask_image_array = np.array(mask_image)
+            mask_image_array = np.where(mask_image_array > 127, 255, 0)
+        else:
+            mask_image_array = np.array(mask_image)
+
+        return mask_image_array.astype(np.uint8)
\ No newline at end of file
diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py
new file mode 100644
index 000000000..ba25e0133
--- /dev/null
+++ b/hed/tools/visualization/word_cloud_util.py
@@ -0,0 +1,86 @@
+import random
+from random import Random
+
+import numpy as np
+from PIL import Image, ImageFilter
+from matplotlib import cm
+from wordcloud import WordCloud
+
+
+def _draw_contour(wc, img):
+    """Slightly tweaked copy of internal WorldCloud function to allow transparency"""
+    if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None:
+        return img
+
+    mask = wc._get_bolean_mask(wc.mask) * 255
+    contour = Image.fromarray(mask.astype(np.uint8))
+    contour = contour.resize(img.size)
+    contour = contour.filter(ImageFilter.FIND_EDGES)
+    contour = np.array(contour)
+
+    # make sure borders are not drawn before changing width
+    contour[[0, -1], :] = 0
+    contour[:, [0, -1]] = 0
+
+    # use gaussian to change width, divide by 10 to give more resolution
+    radius = wc.contour_width / 10
+    contour = Image.fromarray(contour)
+    contour = contour.filter(ImageFilter.GaussianBlur(radius=radius))
+    contour = np.array(contour) > 0
+    if img.mode == 'RGBA':
+        contour = np.dstack((contour, contour, contour, contour))
+    else:
+        contour = np.dstack((contour, contour, contour))
+
+    # color the contour
+    ret = np.array(img) * np.invert(contour)
+    color = np.array(Image.new(img.mode, img.size, wc.contour_color))
+    ret += color * contour
+
+    return Image.fromarray(ret)
+
+# Replace WordCloud function with one that can handle transparency
+WordCloud._draw_contour = _draw_contour
+
+
+def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
+    """Random color generation func"""
+    if random_state is None:
+        random_state = Random()
+    return f"hsl({random_state.randint(0, 255)}, {random_state.randint(50, 100)}%, {random_state.randint(0, 50)}%)"
+
+
+class ColormapColorFunc:
+    def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_range=(0.15, 0.25)):
+        """Initialize a word cloud color generator.
+
+        Parameters:
+            colormap (str, optional): The name of the matplotlib colormap to use for generating colors.
+                                      Defaults to 'nipy_spectral'.
+            color_range (tuple of float, optional): A tuple containing the minimum and maximum values to use
+                                                    from the colormap. Defaults to (0.0, 0.5).
+            color_step_range (tuple of float, optional): A tuple containing the minimum and maximum values to step
+                                                         through the colormap. Defaults to (0.15, 0.25).
+                                                         This is the speed at which it goes through the range chosen.
+                                                         .25 means it will go through 1/4 of the range each pick.
+        """
+        self.colormap = cm.get_cmap(colormap)
+        self.color_range = color_range
+        self.color_step_range = color_step_range
+        self.current_fraction = random.uniform(0, 1)  # Start at a random point
+
+    def color_func(self, word, font_size, position, orientation, random_state=None, **kwargs):
+        # Update the current color fraction and wrap around if necessary
+        color_step = random.uniform(*self.color_step_range)
+        self.current_fraction = (self.current_fraction + color_step) % 1.0
+
+        # Scale the fraction to the desired range
+        scaled_fraction = self.color_range[0] + (self.current_fraction * (self.color_range[1] - self.color_range[0]))
+
+        # Get the color from the colormap
+        color = self.colormap(scaled_fraction)
+
+        return tuple(int(c * 255) for c in color[:3])  # Convert to RGB format
+
+
+default_color_func = ColormapColorFunc().color_func
diff --git a/requirements.txt b/requirements.txt
index 7dd623faa..96a737c45 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,4 @@ openpyxl>=3.1.0
 pandas>=1.3.5
 portalocker>=2.7.0
 semantic_version>=2.10.0
-wordcloud>=1.9.2
+wordcloud==1.9.2
diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py
new file mode 100644
index 000000000..2b515c941
--- /dev/null
+++ b/tests/tools/visualization/test_tag_word_cloud.py
@@ -0,0 +1,110 @@
+import unittest
+from wordcloud import WordCloud
+from hed.tools.visualization import tag_word_cloud
+from hed.tools.visualization.tag_word_cloud import load_and_resize_mask
+import numpy as np
+from PIL import Image, ImageDraw
+import os
+
+
+class TestWordCloudFunctions(unittest.TestCase):
+    def test_convert_summary_to_word_dict(self):
+        # Assume we have a valid summary_json
+        summary_json = {
+            'Overall summary': {
+                'Specifics': {
+                    'Main tags': {
+                        'tag_category_1': [
+                            {'tag': 'tag1', 'events': 5},
+                            {'tag': 'tag2', 'events': 3}
+                        ],
+                        'tag_category_2': [
+                            {'tag': 'tag3', 'events': 7}
+                        ]
+                    }
+                }
+            }
+        }
+        expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}
+
+        word_dict = tag_word_cloud.summary_to_dict(summary_json, transform=None, adjustment=0)
+        self.assertEqual(word_dict, expected_output)
+
+    def test_create_wordcloud(self):
+        word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
+        width = 400
+        height = 200
+        wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height)
+
+        self.assertIsInstance(wc, WordCloud)
+        self.assertEqual(wc.width, width)
+        self.assertEqual(wc.height, height)
+
+    def test_create_wordcloud_with_empty_dict(self):
+        # Test creation of word cloud with an empty dictionary
+        word_dict = {}
+        with self.assertRaises(ValueError):
+            tag_word_cloud.create_wordcloud(word_dict)
+
+    def test_create_wordcloud_with_single_word(self):
+        # Test creation of word cloud with a single word
+        word_dict = {'single_word': 1}
+        wc = tag_word_cloud.create_wordcloud(word_dict)
+        self.assertIsInstance(wc, WordCloud)
+        # Check that the single word is in the word cloud
+        self.assertIn('single_word', wc.words_)
+
+
+class TestLoadAndResizeMask(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # Create a simple black and white image
+        cls.original_size = (300, 200)
+        cls.img = Image.new('L', cls.original_size, 0) # Start with a black image
+
+        # Draw a white circle in the middle of the image
+        d = ImageDraw.Draw(cls.img)
+        circle_radius = min(cls.original_size) // 4 # Radius of the circle is a quarter of the smaller dimension of the image
+        circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Center of the circle is the center of the image
+        d.ellipse((circle_center[0] - circle_radius,
+                   circle_center[1] - circle_radius,
+                   circle_center[0] + circle_radius,
+                   circle_center[1] + circle_radius),
+                  fill=255) # Fill the ellipse with white
+        cls.img_path = 'temp_img.bmp'
+        cls.img.save(cls.img_path)
+
+    @classmethod
+    def tearDownClass(cls):
+        # Clean up the temp image
+        os.remove(cls.img_path)
+
+    def test_no_resizing(self):
+        mask = load_and_resize_mask(self.img_path)
+        mask_img = Image.fromarray(mask)
+        self.assertEqual((mask_img.width, mask_img.height), self.original_size)
+
+    def test_width_resizing(self):
+        width = 150
+        mask = load_and_resize_mask(self.img_path, width=width)
+        mask_img = Image.fromarray(mask)
+        expected_width, expected_height = width, int(self.original_size[1] * width / self.original_size[0])
+        self.assertEqual((mask_img.width, mask_img.height), (expected_width, expected_height))
+
+    def test_height_resizing(self):
+        height = 100
+        mask = load_and_resize_mask(self.img_path, height=height)
+        mask_img = Image.fromarray(mask)
+        expected_shape = (int(self.original_size[0] * height / self.original_size[1]), height)
+        self.assertEqual((mask_img.width, mask_img.height), expected_shape)
+
+    def test_both_dimensions_resizing(self):
+        width, height = 100, 75
+        mask = load_and_resize_mask(self.img_path, width=width, height=height)
+        self.assertEqual(mask.shape, (height, width))
+
+    def test_mask_color(self):
+        mask = load_and_resize_mask(self.img_path)
+        # Since we created an image with '1' mode, all values should be either 0 or 255
+        unique_values = np.unique(mask)
+        self.assertCountEqual(unique_values, [0, 255])
diff --git a/tests/tools/visualizations/test_tag_word_cloud.py b/tests/tools/visualizations/test_tag_word_cloud.py
deleted file mode 100644
index 3b0878f79..000000000
--- a/tests/tools/visualizations/test_tag_word_cloud.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import unittest
-from wordcloud import WordCloud
-from hed.tools.visualization import tag_word_cloud
-
-
-class TestWordCloudFunctions(unittest.TestCase):
-
-    def test_convert_summary_to_word_dict(self):
-        # Assume we have a valid summary_json
-        summary_json = {
-            'Dataset': {
-                'Overall summary': {
-                    'Main tags': {
-                        'tag_category_1': [
-                            {'tag': 'tag1', 'events': 5},
-                            {'tag': 'tag2', 'events': 3}
-                        ],
-                        'tag_category_2': [
-                            {'tag': 'tag3', 'events': 7}
-                        ]
-                    }
-                }
-            }
-        }
-        expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}
-
-        word_dict = tag_word_cloud.summary_to_dict(summary_json)
-        self.assertEqual(word_dict, expected_output)
-
-    def test_create_wordcloud(self):
-        word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
-        width = 400
-        height = 200
-        wc = tag_word_cloud.create_wordcloud(word_dict, width, height)
-
-        self.assertIsInstance(wc, WordCloud)
-        self.assertEqual(wc.width, width)
-        self.assertEqual(wc.height, height)
-
-    def test_create_wordcloud_with_empty_dict(self):
-        # Test creation of word cloud with an empty dictionary
-        word_dict = {}
-        with self.assertRaises(ValueError):
-            tag_word_cloud.create_wordcloud(word_dict)
-
-    def test_create_wordcloud_with_single_word(self):
-        # Test creation of word cloud with a single word
-        word_dict = {'single_word': 1}
-        wc = tag_word_cloud.create_wordcloud(word_dict)
-        self.assertIsInstance(wc, WordCloud)
-        # Check that the single word is in the word cloud
-        self.assertIn('single_word', wc.words_)

From a47a3361eb78b2e905cbc023b4450a07306da46a Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 29 Jun 2023 17:58:11 -0500
Subject: [PATCH 2/3] Fix reference in init

---
 hed/tools/visualization/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hed/tools/visualization/__init__.py b/hed/tools/visualization/__init__.py
index 677aba8ac..a40c0333b 100644
--- a/hed/tools/visualization/__init__.py
+++ b/hed/tools/visualization/__init__.py
@@ -1 +1 @@
-from tag_word_cloud import create_wordcloud, summary_to_dict
+from .tag_word_cloud import create_wordcloud, summary_to_dict

From 2cde33ff507a7db3134506ca6270123638271af0 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 30 Jun 2023 16:06:10 -0500
Subject: [PATCH 3/3] Fix for inflect

---
 .github/workflows/ci.yaml | 4 ++--
 docs/requirements.txt     | 4 ++--
 requirements.txt          | 5 +++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 120662503..0c7d2d647 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -25,7 +25,7 @@ jobs:
       - uses: actions/cache@v3
         with:
           path: ${{ env.pythonLocation }}
-          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}
 
       - name: Install dependencies
         run: |
@@ -85,7 +85,7 @@ jobs:
       - uses: actions/cache@v3
         with:
           path: ${{ env.pythonLocation }}
-          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
+          key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}
 
       - name: Install dependencies
         run: |
diff --git a/docs/requirements.txt b/docs/requirements.txt
index b5ee9d833..b3aa11ea9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,10 +1,10 @@
 defusedxml>=0.7.1
-inflect>=6.0.2
-myst-parser>=0.18.1
+inflect==6.0.2
 numpy>=1.21.6
 openpyxl>=3.1.0
 pandas>=1.3.5
 portalocker>=2.7.0
+pydantic<2  # For compatibility with inflect
 semantic_version>=2.10.0
 Sphinx>=5.2.2
 sphinx_rtd_theme>=1.0.0
diff --git a/requirements.txt b/requirements.txt
index 96a737c45..f23f4bd4d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,9 @@
 defusedxml>=0.7.1
-inflect>=6.0.2
+inflect==6.0.2
 numpy>=1.21.6
 openpyxl>=3.1.0
 pandas>=1.3.5
 portalocker>=2.7.0
+pydantic<2  # For compatibility with inflect
 semantic_version>=2.10.0
-wordcloud==1.9.2
+wordcloud==1.9.2
\ No newline at end of file