Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
- uses: actions/cache@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}

- name: Install dependencies
run: |
Expand Down Expand Up @@ -85,7 +85,7 @@ jobs:
- uses: actions/cache@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('dev-requirements.txt') }}
key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }}

- name: Install dependencies
run: |
Expand Down
6 changes: 3 additions & 3 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
defusedxml>=0.7.1
inflect>=6.0.2
myst-parser>=0.18.1
inflect==6.0.2
numpy>=1.21.6
openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.7.0
pydantic<2 # For compatibility with inflect
semantic_version>=2.10.0
Sphinx>=5.2.2
sphinx_rtd_theme>=1.0.0
wordcloud>=1.9.2
wordcloud==1.9.2
1 change: 1 addition & 0 deletions hed/tools/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .tag_word_cloud import create_wordcloud, summary_to_dict
78 changes: 72 additions & 6 deletions hed/tools/visualization/tag_word_cloud.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,112 @@
from wordcloud import WordCloud
import numpy as np
from PIL import Image
from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud


def create_wordcloud(word_dict, width=400, height=200):
def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=200, **kwargs):
"""Takes a word dict and returns a generated word cloud object

Parameters:
word_dict(dict): words and their frequencies
mask_path(str or None): The path of the mask file
background_color(str or None): If None, transparent background.
width(int): width in pixels
height(int): height in pixels
kwargs(kwargs): Any other parameters WordCloud accepts, overrides default values where relevant.
Returns:
word_cloud(WordCloud): The generated cloud.
Use .to_file to save it out as an image.

:raises ValueError:
An empty dictionary was passed
"""
wc = WordCloud(background_color='white', width=width, height=height)
mask_image = None
if mask_path:
mask_image = load_and_resize_mask(mask_path, width, height)
width = mask_image.shape[1]
height = mask_image.shape[0]
kwargs.setdefault('contour_width', 3)
kwargs.setdefault('contour_color', 'black')
kwargs.setdefault('prefer_horizontal', 0.75)
kwargs.setdefault('default_color_func', default_color_func)
kwargs.setdefault('relative_scaling', 1)
kwargs.setdefault('max_font_size', height / 15)
kwargs.setdefault('min_font_size', 5)

wc = WordCloud(background_color=background_color, mask=mask_image,
width=width, height=height, mode="RGBA", **kwargs)

wc.generate_from_frequencies(word_dict)

return wc


def summary_to_dict(summary):
def summary_to_dict(summary, transform=np.log10, adjustment=5):
"""Converts a HedTagSummary json dict into the word cloud input format

Parameters:
summary(dict): The summary from a summarize hed tags op

transform(func): The function to transform the number of found tags
Default log10
adjustment(int): Value added after transform.
Returns:
word_dict(dict): a dict of the words and their occurrence count

:raises KeyError:
A malformed dictionary was passed

"""
if transform is None:
transform = lambda x: x
overall_summary = summary.get("Overall summary", {})
specifics = overall_summary.get("Specifics", {})
tag_dict = specifics.get("Main tags", {})
word_dict = {}
for tag_sub_list in tag_dict.values():
for tag_sub_dict in tag_sub_list:
word_dict[tag_sub_dict['tag']] = tag_sub_dict['events']
word_dict[tag_sub_dict['tag']] = transform(tag_sub_dict['events']) + adjustment

return word_dict


def load_and_resize_mask(mask_path, width=None, height=None):
""" Load a mask image and resize it according to given dimensions.

The image is resized maintaining aspect ratio if only width or height is provided.

Returns None if no mask_path.

Parameters:
mask_path (str): The path to the mask image file.
width (int, optional): The desired width of the resized image. If only width is provided,
the image is scaled to maintain its original aspect ratio. Defaults to None.
height (int, optional): The desired height of the resized image. If only height is provided,
the image is scaled to maintain its original aspect ratio. Defaults to None.

Returns:
numpy.ndarray: The loaded and processed mask image as a numpy array with binary values (0 or 255).
"""
if mask_path:
mask_image = Image.open(mask_path)

if width or height:
original_size = np.array((mask_image.width, mask_image.height))
output_size = np.array((width, height))
# Handle one missing param
if not height:
scale = original_size[0] / width
output_size = original_size / scale
elif not width:
scale = original_size[1] / height
output_size = original_size / scale

mask_image = mask_image.resize(output_size.astype(int), Image.LANCZOS)

# Convert to greyscale then to binary black and white (0 or 255)
mask_image = mask_image.convert('L')
mask_image_array = np.array(mask_image)
mask_image_array = np.where(mask_image_array > 127, 255, 0)
else:
mask_image_array = np.array(mask_image)

return mask_image_array.astype(np.uint8)
86 changes: 86 additions & 0 deletions hed/tools/visualization/word_cloud_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import random
from random import Random

import numpy as np
from PIL import Image, ImageFilter
from matplotlib import cm
from wordcloud import WordCloud


def _draw_contour(wc, img):
"""Slightly tweaked copy of internal WorldCloud function to allow transparency"""
if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None:
return img

mask = wc._get_bolean_mask(wc.mask) * 255
contour = Image.fromarray(mask.astype(np.uint8))
contour = contour.resize(img.size)
contour = contour.filter(ImageFilter.FIND_EDGES)
contour = np.array(contour)

# make sure borders are not drawn before changing width
contour[[0, -1], :] = 0
contour[:, [0, -1]] = 0

# use gaussian to change width, divide by 10 to give more resolution
radius = wc.contour_width / 10
contour = Image.fromarray(contour)
contour = contour.filter(ImageFilter.GaussianBlur(radius=radius))
contour = np.array(contour) > 0
if img.mode == 'RGBA':
contour = np.dstack((contour, contour, contour, contour))
else:
contour = np.dstack((contour, contour, contour))

# color the contour
ret = np.array(img) * np.invert(contour)
color = np.array(Image.new(img.mode, img.size, wc.contour_color))
ret += color * contour

return Image.fromarray(ret)

# Replace WordCloud function with one that can handle transparency
WordCloud._draw_contour = _draw_contour


def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
"""Random color generation func"""
if random_state is None:
random_state = Random()
return f"hsl({random_state.randint(0, 255)}, {random_state.randint(50, 100)}%, {random_state.randint(0, 50)}%)"


class ColormapColorFunc:
def __init__(self, colormap='nipy_spectral', color_range=(0.0, 0.5), color_step_range=(0.15, 0.25)):
"""Initialize a word cloud color generator.

Parameters:
colormap (str, optional): The name of the matplotlib colormap to use for generating colors.
Defaults to 'nipy_spectral'.
color_range (tuple of float, optional): A tuple containing the minimum and maximum values to use
from the colormap. Defaults to (0.0, 0.5).
color_step_range (tuple of float, optional): A tuple containing the minimum and maximum values to step
through the colormap. Defaults to (0.15, 0.25).
This is the speed at which it goes through the range chosen.
.25 means it will go through 1/4 of the range each pick.
"""
self.colormap = cm.get_cmap(colormap)
self.color_range = color_range
self.color_step_range = color_step_range
self.current_fraction = random.uniform(0, 1) # Start at a random point

def color_func(self, word, font_size, position, orientation, random_state=None, **kwargs):
# Update the current color fraction and wrap around if necessary
color_step = random.uniform(*self.color_step_range)
self.current_fraction = (self.current_fraction + color_step) % 1.0

# Scale the fraction to the desired range
scaled_fraction = self.color_range[0] + (self.current_fraction * (self.color_range[1] - self.color_range[0]))

# Get the color from the colormap
color = self.colormap(scaled_fraction)

return tuple(int(c * 255) for c in color[:3]) # Convert to RGB format


default_color_func = ColormapColorFunc().color_func
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
defusedxml>=0.7.1
inflect>=6.0.2
inflect==6.0.2
numpy>=1.21.6
openpyxl>=3.1.0
pandas>=1.3.5
portalocker>=2.7.0
pydantic<2 # For compatibility with inflect
semantic_version>=2.10.0
wordcloud>=1.9.2
wordcloud==1.9.2
110 changes: 110 additions & 0 deletions tests/tools/visualization/test_tag_word_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import unittest
from wordcloud import WordCloud
from hed.tools.visualization import tag_word_cloud
from hed.tools.visualization.tag_word_cloud import load_and_resize_mask
import numpy as np
from PIL import Image, ImageDraw
import os


class TestWordCloudFunctions(unittest.TestCase):
def test_convert_summary_to_word_dict(self):
# Assume we have a valid summary_json
summary_json = {
'Overall summary': {
'Specifics': {
'Main tags': {
'tag_category_1': [
{'tag': 'tag1', 'events': 5},
{'tag': 'tag2', 'events': 3}
],
'tag_category_2': [
{'tag': 'tag3', 'events': 7}
]
}
}
}
}
expected_output = {'tag1': 5, 'tag2': 3, 'tag3': 7}

word_dict = tag_word_cloud.summary_to_dict(summary_json, transform=None, adjustment=0)
self.assertEqual(word_dict, expected_output)

def test_create_wordcloud(self):
word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7}
width = 400
height = 200
wc = tag_word_cloud.create_wordcloud(word_dict, width=width, height=height)

self.assertIsInstance(wc, WordCloud)
self.assertEqual(wc.width, width)
self.assertEqual(wc.height, height)

def test_create_wordcloud_with_empty_dict(self):
# Test creation of word cloud with an empty dictionary
word_dict = {}
with self.assertRaises(ValueError):
tag_word_cloud.create_wordcloud(word_dict)

def test_create_wordcloud_with_single_word(self):
# Test creation of word cloud with a single word
word_dict = {'single_word': 1}
wc = tag_word_cloud.create_wordcloud(word_dict)
self.assertIsInstance(wc, WordCloud)
# Check that the single word is in the word cloud
self.assertIn('single_word', wc.words_)


class TestLoadAndResizeMask(unittest.TestCase):
@classmethod
def setUpClass(cls):
# Create a simple black and white image
cls.original_size = (300, 200)
cls.img = Image.new('L', cls.original_size, 0) # Start with a black image

# Draw a white circle in the middle of the image
d = ImageDraw.Draw(cls.img)
circle_radius = min(cls.original_size) // 4 # Radius of the circle is a quarter of the smaller dimension of the image
circle_center = (cls.original_size[0] // 2, cls.original_size[1] // 2) # Center of the circle is the center of the image
d.ellipse((circle_center[0] - circle_radius,
circle_center[1] - circle_radius,
circle_center[0] + circle_radius,
circle_center[1] + circle_radius),
fill=255) # Fill the ellipse with white
cls.img_path = 'temp_img.bmp'
cls.img.save(cls.img_path)

@classmethod
def tearDownClass(cls):
# Clean up the temp image
os.remove(cls.img_path)

def test_no_resizing(self):
mask = load_and_resize_mask(self.img_path)
mask_img = Image.fromarray(mask)
self.assertEqual((mask_img.width, mask_img.height), self.original_size)

def test_width_resizing(self):
width = 150
mask = load_and_resize_mask(self.img_path, width=width)
mask_img = Image.fromarray(mask)
expected_width, expected_height = width, int(self.original_size[1] * width / self.original_size[0])
self.assertEqual((mask_img.width, mask_img.height), (expected_width, expected_height))

def test_height_resizing(self):
height = 100
mask = load_and_resize_mask(self.img_path, height=height)
mask_img = Image.fromarray(mask)
expected_shape = (int(self.original_size[0] * height / self.original_size[1]), height)
self.assertEqual((mask_img.width, mask_img.height), expected_shape)

def test_both_dimensions_resizing(self):
width, height = 100, 75
mask = load_and_resize_mask(self.img_path, width=width, height=height)
self.assertEqual(mask.shape, (height, width))

def test_mask_color(self):
mask = load_and_resize_mask(self.img_path)
# Since we created an image with '1' mode, all values should be either 0 or 255
unique_values = np.unique(mask)
self.assertCountEqual(unique_values, [0, 255])
Loading