From b8a5fbebb3e889d082823f5a5bd50db403c5ca13 Mon Sep 17 00:00:00 2001 From: IanCa Date: Thu, 7 Sep 2023 18:52:40 -0500 Subject: [PATCH 1/2] Add svg support for wordcloud --- hed/tools/visualization/tag_word_cloud.py | 25 ++++++++++- hed/tools/visualization/word_cloud_util.py | 41 ++++++++++++++++-- tests/data/visualization/word_mask.png | Bin 0 -> 4717 bytes tests/models/test_definition_dict.py | 10 +++++ .../visualization/test_tag_word_cloud.py | 40 +++++++++++++++++ 5 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 tests/data/visualization/word_mask.png diff --git a/hed/tools/visualization/tag_word_cloud.py b/hed/tools/visualization/tag_word_cloud.py index 68a3a257d..9f9092cba 100644 --- a/hed/tools/visualization/tag_word_cloud.py +++ b/hed/tools/visualization/tag_word_cloud.py @@ -1,9 +1,9 @@ import numpy as np from PIL import Image -from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud +from hed.tools.visualization.word_cloud_util import default_color_func, WordCloud, generate_contour_svg -def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=200, **kwargs): +def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400, height=None, **kwargs): """Takes a word dict and returns a generated word cloud object Parameters: @@ -25,6 +25,13 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 mask_image = load_and_resize_mask(mask_path, width, height) width = mask_image.shape[1] height = mask_image.shape[0] + if height is None: + if width is None: + width = 400 + height = width // 2 + if width is None: + width = height * 2 + kwargs.setdefault('contour_width', 3) kwargs.setdefault('contour_color', 'black') kwargs.setdefault('prefer_horizontal', 0.75) @@ -41,6 +48,20 @@ def create_wordcloud(word_dict, mask_path=None, background_color=None, width=400 return wc +def word_cloud_to_svg(wc): + """Takes word cloud and returns it as an SVG string. + + Parameters: + wc(WordCloud): the word cloud object + Returns: + svg_string(str): The svg for the word cloud + """ + svg_string = wc.to_svg() + svg_string = svg_string.replace("fill:", "fill:rgb") + svg_string = svg_string.replace("", generate_contour_svg(wc, wc.width, wc.height) + "") + return svg_string + + def summary_to_dict(summary, transform=np.log10, adjustment=5): """Converts a HedTagSummary json dict into the word cloud input format diff --git a/hed/tools/visualization/word_cloud_util.py b/hed/tools/visualization/word_cloud_util.py index 6071a138a..490be199f 100644 --- a/hed/tools/visualization/word_cloud_util.py +++ b/hed/tools/visualization/word_cloud_util.py @@ -7,14 +7,31 @@ from wordcloud import WordCloud -def _draw_contour(wc, img): +def generate_contour_svg(wc, width, height): + """Generates an SVG contour mask based on a word cloud object and dimensions. + + Parameters: + wc (WordCloud): The word cloud object. + width (int): SVG image width in pixels. + height (int): SVG image height in pixels. + + Returns: + str: SVG point list for the contour mask, or empty string if not generated. + """ + contour = _get_contour_mask(wc, width, height) + if contour is None: + return "" + return _numpy_to_svg(contour) + + +def _get_contour_mask(wc, width, height): """Slightly tweaked copy of internal WorldCloud function to allow transparency""" if wc.mask is None or wc.contour_width == 0 or wc.contour_color is None: - return img + return None mask = wc._get_bolean_mask(wc.mask) * 255 contour = Image.fromarray(mask.astype(np.uint8)) - contour = contour.resize(img.size) + contour = contour.resize((width, height)) contour = contour.filter(ImageFilter.FIND_EDGES) contour = np.array(contour) @@ -22,6 +39,15 @@ def _draw_contour(wc, img): contour[[0, -1], :] = 0 contour[:, [0, -1]] = 0 + return contour + + +def _draw_contour(wc, img): + """Slightly tweaked copy of internal WorldCloud function to allow transparency""" + contour = _get_contour_mask(wc, img.width, img.height) + if contour is None: + return img + # use gaussian to change width, divide by 10 to give more resolution radius = wc.contour_width / 10 contour = Image.fromarray(contour) @@ -44,6 +70,15 @@ def _draw_contour(wc, img): WordCloud._draw_contour = _draw_contour +def _numpy_to_svg(contour): + svg_elements = [] + points = np.array(contour.nonzero()).T + for y, x in points: + svg_elements.append(f'') + + return '\n'.join(svg_elements) + + def random_color_darker(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None): """Random color generation func""" if random_state is None: diff --git a/tests/data/visualization/word_mask.png b/tests/data/visualization/word_mask.png new file mode 100644 index 0000000000000000000000000000000000000000..e235d063ea190f92c537137ad9eb3a87b3faf3e4 GIT binary patch literal 4717 zcmcIodpOkF*I#2WGf87iLS-7`)^YFRRwiSNTZokCRGQp7IU#jolrv?75-N2_$>=D_ z5l1048l@=7sH4cGDP1O%OViMi{PxK2yyuViulIRg&*Pc>U3-7mT6^u!TI=)OX+CRQ zRcFng1pun`EOspvtO!6Lq0HnH7fWzv@ zgfoE-+(I4&7;BkCC|JzLflsyw3yW7vFfiBP|9#o5uv@_1lG)hTJk>k4|FH?C*j<9T zxG%GkG_@0hk3K5G=J#vw4&M@Q_DYQ*!JK0qzHx&yS&et2lKHsoSvPwh)%o96%W;>~ z-&PF#wY4{R1eZ>eve(k0D8#dH9#0QQ7anPDZGk@sbgj}o5jsaP2Yzd93^yF-UZ&j8 zP&`fcyBc;$xy-UH=}5yuz1)u&e>~mRUA*K;--xYFf?3(d0|Yy}+GK2lj(xbwfeufp zSSbcK6z{7QMCJ@16($X=df`w=e2X4Zgh^GN6YN#|#>Wjh>3#RNX?aU3^bg=Z+4d&K z@F>k{d(|=}Np(ER8-jgPeVh4JBk5ufvE!P9#OEYVw%%}oay@R0K!2bX=W{}wG{B== zN?RF_g0m6doI*jWc{LK3Y9Ej@u_$9no1Uu5%HdqSEpgrwQoazI_{vQ)kj&v3409_z zzlcrvuC@xq8js=nC}yS)iy}H6yk&!t6z?d;2y#a&X#B?Y3Pdfh;Rh_CpSez}q**~xQL2Pfu-K6d@ZkTY9vlpE>!che(Qrmkr=Bdo5~bptcA zOp)SqRH8?At=^61`%7b4;f1c6^BmY3hWl7yg|4wQV!+AInGrmS*T&b}_ITgl+<26h zfDZVkzK=zfM%)fk9zOH=ST2uJy|Doj!)hB%^K0LztIQL|FWPbOVyyOER_3~|m*$FB z4JoX3rfjZSOXa&I@@7O?OVW=rE&n^NW?5?mutwL+Do@ zPT5nY#!)G^g}=7Z-_7>!nHNHW;twNnakSr@c!9bKM7B=i!DPLKJ@a%&;c3X^J0nBR z{pLog5F5d6V2{yg+7tp^|vzghDG`z*3}uLxt<`S&_%>78M~$rm~96A=GTg z*xsqD$MH4Kt# z?#;krJN%d=TF~UB#L$-&{mtv8?lc7Ua=s0>2}?CjSpPHg$}$@@XzRS(ug=lA9s^Cc z^ZibU1{zec@X%1YLMiML2dXiDZxY&5Nn_u(b0#Q@!6Hr2&FghG-v=AEwKjVoNtE4#7I(=O zF%!r3lL(1r8G*0+>;;Z5W}OtS9Cak*>J_(!-~8g1?~3_*N}p za`N<~LRd@;2HJCxdr!1(O0R`Tt``G!&crT<=H&b%1b9{9Fjlc%ay=EeCr!y58WDmj z{i~~4lIzhx#U=c*>5WsX5*A8Jc1zOip{em(&zeqA>@i>j%0{eLAId+1g@OpA{EK#X zw}1ao?H)&fuc3uF;Dx5fX38sciKpa;L@2&b`E^S!dPZrFCc{hidZmdzy~M&-lMk(z zQ&$4_T+tc13GSHhAqtdJlJY-IpUr!wcMDcnyIW{?iP@T>0@0gG{L1=# zzq0=%&IPeOS>7g?-crv|fU;$6Sa`f-_6ir|-&_rAY4!>b(siA@Ut^$Kb1CW>HQv)R z=F){-3UHNVEJfg8tlY+wuehpVh*mtZ)aO{RBwq@EYqGqdm5E#u`UMSLd(VG&2Lm4; zlUu!r$mG0+-dh(A|D>LanHDD0u_gT1Bqh=N~WFzDK;c}z^%8^uCeiMDeA@8v8lZP9`Zn(+7MrYw8P=~6i!AI2JTtP zh5nz%H^^gPWguPnLII@V=;X@V?dvrDtInZh#@;&Sl^@NpuxEY$-02`6N(n4ltqebv zJT`H1y7C}T8L|$X-vnGE3pFN-1LP#3h`oZf(Vx`Ve{N&kFm0m8l7H}0Y6OgH#wpQ=mw(+Jum2{j1>uHI6s3>2qn3LyU7 zbb9a8ETD#ZXZ`e1%87va;Z^PbL}LNX`=4-qXASGCMO|ZpXSe19HM{u56S)`PNd$61 z9)gF#C4lVDTs_jiAxAVpl>t1*Ya)eL z{fgE{dA`J%`9zBq6ZPgby;YUaK9WOCpw8CJXLyQHNY79Ma6?8 zKeyyL)1qg|a|Az=69D{^5k{fRoiy$JyCVE|ad;8hAv|)|m=-q{&H65OEmD~o{672p zVCZ+%?*j*S9i;zQ3uSc?QLz8T#an3rBs3(V9g8n7r-YFx6m}I_Ksa zycX;lO}c;PZOS~>HNGo(t$759E&gStEnRNK8I^?PbGHxcN+XOP-&KXnawA7s3*zwK zS$K={#FN=jD$m^IYB3^Rsf70G?i{#0jXrtWM`5`#KmY^ zzk<(M(fO4XIEG)_n7oKbJ4&0ntoe;p?VZYCYusFCDAh7-_QHUl+!%SOsyUD16k2Ns z;v10@Iild2NE-q~ZhAE7Az9*lFHZ^nByl|TWY9z#$TyJY7!Mr93Rf&0@?&PcU;2Up zf1TQ&sm|eLO}Z!)BD~E&i)SJ*itg0w&$BCm8YG^#T$m=i+EDP3m@WRFwc` z1iJ68Y!!w|`~4m^rMX>_H6a3eEwDFUGFPbVjtlC_VFZ7kqf=~Zt$ zef82FtT?w_rp$NUez`{^$H;m6Cv>R#xy9kD3jLJsI0PIO1=}4Yvgw!0CYYI_e{h~) zOT*rL4wO_Ze?sRz->|hWnG$)BSU?;2yGq{lsmEK7-0mC68DITW78xUphcjaxZ_K~C zvD0Hhei+>duJ`j^pihLOnRl5XS3^n4oQgVm(>5a14g6?o)BCH~=+C#PPiqn~#yDmc zy$ND%_fM#oMYoM9g?=6{7b+~UCcGlKx4J0|f+Xm2i&kFJVW6c%l*+zLP)mLNt`pqppqU$awk z7qxFgQk)dAr} zv)QYaQ4t1ZQi3pI=A13FE>SFx%b5eJP_xmYqfc!Zr#*@KhH*gF$vOQPy_rfT)W5(% zUm-*`tpX=SOnv&c;Y4mfMt~N4dugHc^s zc66o)bJ;Yn&)jZ=PB};6{2T?CO+$M4uw^TMTZ2=V$JXnHAJ>{5m=@gb_I7 zxY}mkB}x{;(6s0JmI)>A5;*c_x1e7JF<)kUX3#EhgxWK&WTjuCM5CJ!v6^=;a}|?F z)m=vy<(=2Og-i{$oAWGVgqL`Qn(7pg>W8)K?)*{uaRKp(qvqOD#j8)kT*-N zqoCmFTEkJQ5g+H1ZHq-S+sqQfy)Bu@O*Y0q$@{hAPWgQZ-1~yBZtNS8ox{ijprRgI iu8jEZ|KYA?3Y(5OowLqx@ojW}1n$mjSY=H1!T$jA_mTYo literal 0 HcmV?d00001 diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 357584cc1..5005f55c5 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -134,5 +134,15 @@ def test_expand_defs(self): hed_string.expand_defs() self.assertEqual(str(hed_string), expected_results[key]) + def test_altering_definition_contents(self): + def_dict = DefinitionDict("(Definition/DefName, (Event, Action))", self.hed_schema) + hed_string1 = HedString("Def/DefName", self.hed_schema, def_dict) + hed_string2 = HedString("Def/DefName", self.hed_schema, def_dict) + hed_string1.expand_defs() + hed_string2.expand_defs() + hed_string1.remove([hed_string1.get_all_tags()[2]]) + + self.assertNotEqual(hed_string1, hed_string2) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/visualization/test_tag_word_cloud.py b/tests/tools/visualization/test_tag_word_cloud.py index 2b515c941..6bb940eec 100644 --- a/tests/tools/visualization/test_tag_word_cloud.py +++ b/tests/tools/visualization/test_tag_word_cloud.py @@ -2,12 +2,19 @@ from wordcloud import WordCloud from hed.tools.visualization import tag_word_cloud from hed.tools.visualization.tag_word_cloud import load_and_resize_mask +from hed.tools.visualization.word_cloud_util import generate_contour_svg + import numpy as np from PIL import Image, ImageDraw import os class TestWordCloudFunctions(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.mask_path = os.path.realpath(os.path.join(os.path.dirname(__file__), + '../../data/visualization/word_mask.png')) + def test_convert_summary_to_word_dict(self): # Assume we have a valid summary_json summary_json = { @@ -40,6 +47,30 @@ def test_create_wordcloud(self): self.assertEqual(wc.width, width) self.assertEqual(wc.height, height) + def test_create_wordcloud_default_params(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, 400) + self.assertEqual(wc.height, 200) + + def test_mask_scaling(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=300) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, 300) + self.assertEqual(wc.height, 300) + + def test_mask_scaling2(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict, self.mask_path, width=300, height=None) + + self.assertIsInstance(wc, WordCloud) + self.assertEqual(wc.width, 300) + self.assertLess(wc.height, 300) + def test_create_wordcloud_with_empty_dict(self): # Test creation of word cloud with an empty dictionary word_dict = {} @@ -54,6 +85,15 @@ def test_create_wordcloud_with_single_word(self): # Check that the single word is in the word cloud self.assertIn('single_word', wc.words_) + def test_valid_word_cloud(self): + word_dict = {'tag1': 5, 'tag2': 3, 'tag3': 7} + wc = tag_word_cloud.create_wordcloud(word_dict, mask_path=self.mask_path, width=400, height=None) + svg_output = tag_word_cloud.word_cloud_to_svg(wc) + self.assertTrue(svg_output.startswith('