From 2f130bc055c892480d99782fad8e3beaf27bbc16 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Mon, 23 Dec 2019 16:17:06 +0100 Subject: [PATCH 01/12] Add to_svg prototype, using freetype-py to get metrics --- wordcloud/wordcloud.py | 127 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 83b5590d4..3fc004764 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -728,6 +728,133 @@ def __array__(self): def to_html(self): raise NotImplementedError("FIXME!!!") + + def to_svg(self): + """Export to SVG. + + Returns + ------- + content : string + Word cloud image as SVG string + """ + + # Make sure layout is generated + self._check_generated() + + # Get output size, in pixels + if self.mask is not None: + width = self.mask.shape[1] + height = self.mask.shape[0] + else: + height, width = self.height, self.width + + # Import locally, to make this dependency optional + # TODO try to use Pillow's bindings instead, to avoid additional dependency + import freetype + + # Use FreeType to analyze font + face = freetype.Face(self.font_path) + + # Compute text bounding box + # TODO maybe promote this as a full-fledged internal method + def compute_box(text): + previous_char = 0 + pen_x, pen_y = 0, 0 + min_x, min_y = 0, 0 + max_x, max_y = 0, 0 + for char in text: + face.load_char(char, freetype.FT_LOAD_RENDER) + kerning = face.get_kerning(previous_char, char) + previous_char = char + + # Get raw bitmap + width = face.glyph.bitmap.width + rows = face.glyph.bitmap.rows + top = face.glyph.bitmap_top + left = face.glyph.bitmap_left + + # Apply character bounding box + pen_x += kerning.x + x0 = (pen_x >> 6) + left + x1 = x0 + width + y0 = (pen_y >> 6) - (rows - top) + y1 = y0 + rows + + # Update global bounding box + min_x, max_x = min(min_x, x0), max(max_x, x1) + min_y, max_y = min(min_y, y0), max(max_y, y1) + + # Move pointer + pen_x += face.glyph.advance.x + pen_y += face.glyph.advance.y + + return min_x, min_y, max_x, max_y + + # Text buffer + result = [] + + # Prepare global style + style = {} + # TODO properly escape/quote this + # TODO should add option to specify URL for font (i.e. WOFF file) + # TODO should maybe add option to embed font in SVG file + style['font-family'] = repr(face.family_name.decode('utf-8')) + if face.style_flags & freetype.FT_STYLE_FLAG_BOLD: + style['font-weight'] = 'bold' + if face.style_flags & freetype.FT_STYLE_FLAG_ITALIC: + style['font-weight'] = 'italic' + style = ';'.join(':'.join(pair) for pair in style.items()) + + # Add header + result.append( + '' + .format(width, height, style) + ) + + # Add background + if self.background_color is not None: + result.append( + '' + '' + .format(self.background_color) + ) + + # For each word in layout + for (word, count), font_size, (y, x), orientation, color in self.layout_: + + # Compute text bounding box + face.set_char_size(font_size * 64) + min_x, min_y, max_x, max_y = compute_box(word) + + # Compute text attributes + attributes = {} + if orientation == Image.ROTATE_90: + x += max_y + y += max_x - min_x + attributes['transform'] = 'translate({},{}) rotate(-90)'.format(x, y) + else: + x += min_x + y += max_y + attributes['transform'] = 'translate({},{})'.format(x, y) + attributes['font-size'] = '{}'.format(font_size) + attributes['style'] = 'fill:{}'.format(color) + + # Create node + attributes = ' '.join('{}="{}"'.format(k, v) for k, v in attributes.items()) + result.append('{}'.format(attributes, word)) + + # Complete SVG file + result.append('') + return '\n'.join(result) def _get_bolean_mask(self, mask): """Cast to two dimensional boolean mask.""" From 2030ff90659a6e21e26c932f9afacc3c613f4e51 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Mon, 23 Dec 2019 22:54:59 +0100 Subject: [PATCH 02/12] Use PIL bindings instead of freetype-py --- wordcloud/wordcloud.py | 73 +++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 3fc004764..d15e5210b 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -742,67 +742,37 @@ def to_svg(self): self._check_generated() # Get output size, in pixels + # TODO check self.scale if self.mask is not None: width = self.mask.shape[1] height = self.mask.shape[0] else: height, width = self.height, self.width - - # Import locally, to make this dependency optional - # TODO try to use Pillow's bindings instead, to avoid additional dependency - import freetype - - # Use FreeType to analyze font - face = freetype.Face(self.font_path) - - # Compute text bounding box - # TODO maybe promote this as a full-fledged internal method - def compute_box(text): - previous_char = 0 - pen_x, pen_y = 0, 0 - min_x, min_y = 0, 0 - max_x, max_y = 0, 0 - for char in text: - face.load_char(char, freetype.FT_LOAD_RENDER) - kerning = face.get_kerning(previous_char, char) - previous_char = char - - # Get raw bitmap - width = face.glyph.bitmap.width - rows = face.glyph.bitmap.rows - top = face.glyph.bitmap_top - left = face.glyph.bitmap_left - - # Apply character bounding box - pen_x += kerning.x - x0 = (pen_x >> 6) + left - x1 = x0 + width - y0 = (pen_y >> 6) - (rows - top) - y1 = y0 + rows - - # Update global bounding box - min_x, max_x = min(min_x, x0), max(max_x, x1) - min_y, max_y = min(min_y, y0), max(max_y, y1) - - # Move pointer - pen_x += face.glyph.advance.x - pen_y += face.glyph.advance.y - - return min_x, min_y, max_x, max_y + + # Get max font size + if self.max_font_size is None: + max_font_size = max(w[1] for w in self.layout_) + else: + max_font_size = self.max_font_size # Text buffer result = [] # Prepare global style style = {} + font = ImageFont.truetype(self.font_path, int(max_font_size)) + font_family, font_style = font.getname() # TODO properly escape/quote this # TODO should add option to specify URL for font (i.e. WOFF file) # TODO should maybe add option to embed font in SVG file - style['font-family'] = repr(face.family_name.decode('utf-8')) - if face.style_flags & freetype.FT_STYLE_FLAG_BOLD: + style['font-family'] = repr(font_family) + font_style = font_style.lower() + if 'bold' in font_style: style['font-weight'] = 'bold' - if face.style_flags & freetype.FT_STYLE_FLAG_ITALIC: - style['font-weight'] = 'italic' + if 'italic' in font_style: + style['font-style'] = 'italic' + elif 'oblique' in font_style: + style['font-style'] = 'oblique' style = ';'.join(':'.join(pair) for pair in style.items()) # Add header @@ -831,9 +801,16 @@ def compute_box(text): # For each word in layout for (word, count), font_size, (y, x), orientation, color in self.layout_: + # Get text metrics + font = ImageFont.truetype(self.font_path, int(font_size)) + (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word) + ascent, descent = font.getmetrics() + # Compute text bounding box - face.set_char_size(font_size * 64) - min_x, min_y, max_x, max_y = compute_box(word) + min_x = offset_x + max_x = size_x - offset_x + min_y = ascent - size_y + max_y = ascent - offset_y # Compute text attributes attributes = {} From ad12167f6157d75018229cbdaa978a5dbc5cf5aa Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Mon, 23 Dec 2019 23:02:02 +0100 Subject: [PATCH 03/12] Handle scale attribute --- wordcloud/wordcloud.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index d15e5210b..0067f5564 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -742,13 +742,12 @@ def to_svg(self): self._check_generated() # Get output size, in pixels - # TODO check self.scale if self.mask is not None: width = self.mask.shape[1] height = self.mask.shape[0] else: height, width = self.height, self.width - + # Get max font size if self.max_font_size is None: max_font_size = max(w[1] for w in self.layout_) @@ -760,7 +759,7 @@ def to_svg(self): # Prepare global style style = {} - font = ImageFont.truetype(self.font_path, int(max_font_size)) + font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale)) font_family, font_style = font.getname() # TODO properly escape/quote this # TODO should add option to specify URL for font (i.e. WOFF file) @@ -783,7 +782,7 @@ def to_svg(self): ' height="{}"' ' style="{}"' '>' - .format(width, height, style) + .format(width * self.scale, height * self.scale, style) ) # Add background @@ -800,12 +799,14 @@ def to_svg(self): # For each word in layout for (word, count), font_size, (y, x), orientation, color in self.layout_: + x *= self.scale + y *= self.scale # Get text metrics - font = ImageFont.truetype(self.font_path, int(font_size)) + font = ImageFont.truetype(self.font_path, int(font_size * self.scale)) (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word) ascent, descent = font.getmetrics() - + # Compute text bounding box min_x = offset_x max_x = size_x - offset_x @@ -822,7 +823,7 @@ def to_svg(self): x += min_x y += max_y attributes['transform'] = 'translate({},{})'.format(x, y) - attributes['font-size'] = '{}'.format(font_size) + attributes['font-size'] = '{}'.format(font_size * self.scale) attributes['style'] = 'fill:{}'.format(color) # Create node From 28ac67c5f5a3af54f41479ad36371e0301f09518 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Tue, 24 Dec 2019 20:30:14 +0100 Subject: [PATCH 04/12] Small fix in offset --- wordcloud/wordcloud.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 0067f5564..7842e7450 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -761,9 +761,10 @@ def to_svg(self): style = {} font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale)) font_family, font_style = font.getname() - # TODO properly escape/quote this + # TODO properly escape/quote this name # TODO should add option to specify URL for font (i.e. WOFF file) # TODO should maybe add option to embed font in SVG file + # TODO when embedding, we should try to embed only a subset style['font-family'] = repr(font_family) font_style = font_style.lower() if 'bold' in font_style: @@ -801,14 +802,15 @@ def to_svg(self): for (word, count), font_size, (y, x), orientation, color in self.layout_: x *= self.scale y *= self.scale - + # Get text metrics font = ImageFont.truetype(self.font_path, int(font_size * self.scale)) (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word) ascent, descent = font.getmetrics() # Compute text bounding box - min_x = offset_x + # TODO some browser do not render glyphs the same way (e.g. Segoe Script in Chrome is different, while in Internet Explorer it matches to_image) + min_x = -offset_x max_x = size_x - offset_x min_y = ascent - size_y max_y = ascent - offset_y @@ -830,6 +832,8 @@ def to_svg(self): attributes = ' '.join('{}="{}"'.format(k, v) for k, v in attributes.items()) result.append('{}'.format(attributes, word)) + # TODO draw contour + # Complete SVG file result.append('') return '\n'.join(result) From a71825edacca7795b6b27d801ed671474107ee8a Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Tue, 24 Dec 2019 20:44:34 +0100 Subject: [PATCH 05/12] Add option to embed image in SVG, useful for debug --- wordcloud/wordcloud.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 7842e7450..11df1af1e 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -9,8 +9,10 @@ import warnings from random import Random +import io import os import re +import base64 import sys import colorsys import matplotlib @@ -729,7 +731,7 @@ def __array__(self): def to_html(self): raise NotImplementedError("FIXME!!!") - def to_svg(self): + def to_svg(self, embed_image=False): """Export to SVG. Returns @@ -797,6 +799,21 @@ def to_svg(self): '' .format(self.background_color) ) + + # Embed image, useful for debug purpose + if embed_image: + image = self.to_image() + data = io.BytesIO() + image.save(data, format='JPEG') + data = base64.b64encode(data.getbuffer()).decode('ascii') + result.append( + '' + .format(data) + ) # For each word in layout for (word, count), font_size, (y, x), orientation, color in self.layout_: From 49af1e9dca932e63c2d16dee64847dcc7485b554 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Sun, 29 Dec 2019 22:32:43 +0100 Subject: [PATCH 06/12] Fix documentation and code style --- wordcloud/wordcloud.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 11df1af1e..650963594 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -730,10 +730,24 @@ def __array__(self): def to_html(self): raise NotImplementedError("FIXME!!!") - + def to_svg(self, embed_image=False): """Export to SVG. + Font is assumed to be available to the SVG reader. Otherwise, text + coordinates may produce artifacts when rendered with replacement font. + + Note that some renderers do not handle glyphs the same way, and may + differ from `to_image` result. In particular, handwriting-like fonts + (e.g. Segoe Script) ligatures might not be properly rendered, which + could introduce discrepancies in tight layouts. + + Parameters + ---------- + embed_image : bool, default=False + Whether to include rasterized image inside resulting SVG file. + Useful for debugging. + Returns ------- content : string @@ -799,7 +813,7 @@ def to_svg(self, embed_image=False): '' .format(self.background_color) ) - + # Embed image, useful for debug purpose if embed_image: image = self.to_image() @@ -819,17 +833,15 @@ def to_svg(self, embed_image=False): for (word, count), font_size, (y, x), orientation, color in self.layout_: x *= self.scale y *= self.scale - + # Get text metrics font = ImageFont.truetype(self.font_path, int(font_size * self.scale)) (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word) ascent, descent = font.getmetrics() # Compute text bounding box - # TODO some browser do not render glyphs the same way (e.g. Segoe Script in Chrome is different, while in Internet Explorer it matches to_image) min_x = -offset_x max_x = size_x - offset_x - min_y = ascent - size_y max_y = ascent - offset_y # Compute text attributes From 061ee0671dabb0b19f99f3e336d8b5c52dcf6b17 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Sun, 29 Dec 2019 23:15:20 +0100 Subject: [PATCH 07/12] Properly escape words in XML --- wordcloud/wordcloud.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 650963594..595efe100 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -18,6 +18,7 @@ import matplotlib import numpy as np from operator import itemgetter +from xml.sax import saxutils from PIL import Image from PIL import ImageColor @@ -754,6 +755,10 @@ def to_svg(self, embed_image=False): Word cloud image as SVG string """ + # TODO should add option to specify URL for font (i.e. WOFF file) + # TODO should maybe add option to embed font in SVG file + # TODO when embedding, we should try to embed only a subset + # Make sure layout is generated self._check_generated() @@ -778,9 +783,6 @@ def to_svg(self, embed_image=False): font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale)) font_family, font_style = font.getname() # TODO properly escape/quote this name - # TODO should add option to specify URL for font (i.e. WOFF file) - # TODO should maybe add option to embed font in SVG file - # TODO when embedding, we should try to embed only a subset style['font-family'] = repr(font_family) font_style = font_style.lower() if 'bold' in font_style: @@ -797,9 +799,13 @@ def to_svg(self, embed_image=False): ' xmlns="http://www.w3.org/2000/svg"' ' width="{}"' ' height="{}"' - ' style="{}"' + ' style={}' '>' - .format(width * self.scale, height * self.scale, style) + .format( + width * self.scale, + height * self.scale, + saxutils.quoteattr(style) + ) ) # Add background @@ -849,17 +855,29 @@ def to_svg(self, embed_image=False): if orientation == Image.ROTATE_90: x += max_y y += max_x - min_x - attributes['transform'] = 'translate({},{}) rotate(-90)'.format(x, y) + transform = 'translate({},{}) rotate(-90)'.format(x, y) else: x += min_x y += max_y - attributes['transform'] = 'translate({},{})'.format(x, y) - attributes['font-size'] = '{}'.format(font_size * self.scale) - attributes['style'] = 'fill:{}'.format(color) + transform = 'translate({},{})'.format(x, y) # Create node attributes = ' '.join('{}="{}"'.format(k, v) for k, v in attributes.items()) - result.append('{}'.format(attributes, word)) + result.append( + '' + '{}' + '' + .format( + transform, + font_size * self.scale, + color, + saxutils.escape(word) + ) + ) # TODO draw contour From 4ccece5643910f66571921e6a3129c409dd96be2 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Fri, 3 Jan 2020 13:42:18 +0100 Subject: [PATCH 08/12] Add embed_font option for to_svg --- wordcloud/wordcloud.py | 96 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 17 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 595efe100..1af45e11b 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -732,11 +732,13 @@ def __array__(self): def to_html(self): raise NotImplementedError("FIXME!!!") - def to_svg(self, embed_image=False): + def to_svg(self, embed_image=False, embed_font=False): """Export to SVG. Font is assumed to be available to the SVG reader. Otherwise, text coordinates may produce artifacts when rendered with replacement font. + It is also possible to include the original font in WOFF format using + `embed_font`. Note that some renderers do not handle glyphs the same way, and may differ from `to_image` result. In particular, handwriting-like fonts @@ -749,6 +751,9 @@ def to_svg(self, embed_image=False): Whether to include rasterized image inside resulting SVG file. Useful for debugging. + embed_font : bool, default=False + Whether to include font inside resulting SVG file. + Returns ------- content : string @@ -756,7 +761,6 @@ def to_svg(self, embed_image=False): """ # TODO should add option to specify URL for font (i.e. WOFF file) - # TODO should maybe add option to embed font in SVG file # TODO when embedding, we should try to embed only a subset # Make sure layout is generated @@ -778,20 +782,22 @@ def to_svg(self, embed_image=False): # Text buffer result = [] - # Prepare global style - style = {} + # Get font information font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale)) - font_family, font_style = font.getname() + raw_font_family, raw_font_style = font.getname() # TODO properly escape/quote this name - style['font-family'] = repr(font_family) - font_style = font_style.lower() - if 'bold' in font_style: - style['font-weight'] = 'bold' - if 'italic' in font_style: - style['font-style'] = 'italic' - elif 'oblique' in font_style: - style['font-style'] = 'oblique' - style = ';'.join(':'.join(pair) for pair in style.items()) + font_family = repr(raw_font_family) + raw_font_style = raw_font_style.lower() + if 'bold' in raw_font_style: + font_weight = 'bold' + else: + font_weight = 'normal' + if 'italic' in raw_font_style: + font_style = 'italic' + elif 'oblique' in raw_font_style: + font_style = 'oblique' + else: + font_style = 'normal' # Add header result.append( @@ -799,12 +805,68 @@ def to_svg(self, embed_image=False): ' xmlns="http://www.w3.org/2000/svg"' ' width="{}"' ' height="{}"' - ' style={}' '>' .format( width * self.scale, - height * self.scale, - saxutils.quoteattr(style) + height * self.scale + ) + ) + + # Embed font, if requested + if embed_font: + + # Import here, to avoid hard dependency on fonttools + import fontTools + import fontTools.subset as subset + + # Load and subset font + options = subset.Options() + ttf = subset.load_font(self.font_path, options) + # TODO do subset + + # Export as WOFF + # TODO is there a better method, i.e. directly export to WOFF? + buffer = io.BytesIO() + ttf.saveXML(buffer) + buffer.seek(0) + woff = fontTools.ttLib.TTFont(flavor='woff') + woff.importXML(buffer) + + # Create stylesheet with embedded font face + buffer = io.BytesIO() + woff.save(buffer) + data = base64.b64encode(buffer.getbuffer()).decode('ascii') + url = 'data:application/font-woff;charset=utf-8;base64,' + data + result.append( + '' + .format( + font_family, + font_weight, + font_style, + url + ) + ) + + # Select global style + result.append( + '' + .format( + font_family, + font_weight, + font_style ) ) From 4ed503c5edd6877fb397964db21bcfdb63a2b9d3 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Sat, 4 Jan 2020 11:29:19 +0100 Subject: [PATCH 09/12] Subset font when embedding in SVG --- wordcloud/wordcloud.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 1af45e11b..b7539951c 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -737,14 +737,16 @@ def to_svg(self, embed_image=False, embed_font=False): Font is assumed to be available to the SVG reader. Otherwise, text coordinates may produce artifacts when rendered with replacement font. - It is also possible to include the original font in WOFF format using - `embed_font`. + It is also possible to include a subset of the original font in WOFF + format using `embed_font` (requires `fontTools`). Note that some renderers do not handle glyphs the same way, and may differ from `to_image` result. In particular, handwriting-like fonts (e.g. Segoe Script) ligatures might not be properly rendered, which could introduce discrepancies in tight layouts. + Contour drawing is not supported. + Parameters ---------- embed_image : bool, default=False @@ -761,7 +763,6 @@ def to_svg(self, embed_image=False, embed_font=False): """ # TODO should add option to specify URL for font (i.e. WOFF file) - # TODO when embedding, we should try to embed only a subset # Make sure layout is generated self._check_generated() @@ -785,8 +786,9 @@ def to_svg(self, embed_image=False, embed_font=False): # Get font information font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale)) raw_font_family, raw_font_style = font.getname() - # TODO properly escape/quote this name + # TODO properly escape/quote this name? font_family = repr(raw_font_family) + # TODO better support for uncommon font styles/weights? raw_font_style = raw_font_style.lower() if 'bold' in raw_font_style: font_weight = 'bold' @@ -817,12 +819,28 @@ def to_svg(self, embed_image=False, embed_font=False): # Import here, to avoid hard dependency on fonttools import fontTools - import fontTools.subset as subset + import fontTools.subset + + # Subset options + options = fontTools.subset.Options( + + # Small impact on character shapes, but reduce size a lot + hinting=False, + + # On small subsets, can improve size + desubroutinize=True, + + # Try to be lenient + ignore_missing_glyphs=True, + ) # Load and subset font - options = subset.Options() - ttf = subset.load_font(self.font_path, options) - # TODO do subset + ttf = fontTools.subset.load_font(self.font_path, options) + subsetter = fontTools.subset.Subsetter(options) + characters = {c for item in self.layout_ for c in item[0][0]} + text = ''.join(characters) + subsetter.populate(text=text) + subsetter.subset(ttf) # Export as WOFF # TODO is there a better method, i.e. directly export to WOFF? From 674e0fa8cd865e015b812ff6f87c3be169817bb6 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Sat, 4 Jan 2020 11:37:23 +0100 Subject: [PATCH 10/12] Add option for font optimization --- wordcloud/wordcloud.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index b7539951c..434ffc53f 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -732,7 +732,7 @@ def __array__(self): def to_html(self): raise NotImplementedError("FIXME!!!") - def to_svg(self, embed_image=False, embed_font=False): + def to_svg(self, embed_font=False, optimize_embedded_font=True, embed_image=False): """Export to SVG. Font is assumed to be available to the SVG reader. Otherwise, text @@ -749,13 +749,18 @@ def to_svg(self, embed_image=False, embed_font=False): Parameters ---------- + embed_font : bool, default=False + Whether to include font inside resulting SVG file. + + optimize_embedded_font : bool, default=True + Whether to be aggressive when embedding a font, to reduce size. In + particular, hinting tables are dropped, which may introduces slight + changes to character shapes (w.r.t. `to_image` baseline). + embed_image : bool, default=False Whether to include rasterized image inside resulting SVG file. Useful for debugging. - embed_font : bool, default=False - Whether to include font inside resulting SVG file. - Returns ------- content : string @@ -825,10 +830,10 @@ def to_svg(self, embed_image=False, embed_font=False): options = fontTools.subset.Options( # Small impact on character shapes, but reduce size a lot - hinting=False, + hinting=not optimize_embedded_font, # On small subsets, can improve size - desubroutinize=True, + desubroutinize=optimize_embedded_font, # Try to be lenient ignore_missing_glyphs=True, From 289fef597fbfed325db577a98ba06809fb0b1fa5 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Tue, 7 Jan 2020 23:36:37 +0100 Subject: [PATCH 11/12] Update to_svg documentation about Complex Text Layout --- wordcloud/wordcloud.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py index 434ffc53f..a026bb6e8 100644 --- a/wordcloud/wordcloud.py +++ b/wordcloud/wordcloud.py @@ -738,12 +738,22 @@ def to_svg(self, embed_font=False, optimize_embedded_font=True, embed_image=Fals Font is assumed to be available to the SVG reader. Otherwise, text coordinates may produce artifacts when rendered with replacement font. It is also possible to include a subset of the original font in WOFF - format using `embed_font` (requires `fontTools`). + format using ``embed_font`` (requires `fontTools`). Note that some renderers do not handle glyphs the same way, and may - differ from `to_image` result. In particular, handwriting-like fonts - (e.g. Segoe Script) ligatures might not be properly rendered, which - could introduce discrepancies in tight layouts. + differ from ``to_image`` result. In particular, Complex Text Layout may + not be supported. In this typesetting, the shape or positioning of a + grapheme depends on its relation to other graphemes. + + Pillow, since version 4.2.0, supports CTL using ``libraqm``. However, + due to dependencies, this feature is not always enabled. Hence, the + same rendering differences may appear in ``to_image``. As this + rasterized output is used to compute the layout, this also affects the + layout generation. Use ``PIL.features.check`` to test availability of + ``raqm``. + + Consistant rendering is therefore expected if both Pillow and the SVG + renderer have the same support of CTL. Contour drawing is not supported. @@ -754,7 +764,7 @@ def to_svg(self, embed_font=False, optimize_embedded_font=True, embed_image=Fals optimize_embedded_font : bool, default=True Whether to be aggressive when embedding a font, to reduce size. In - particular, hinting tables are dropped, which may introduces slight + particular, hinting tables are dropped, which may introduce slight changes to character shapes (w.r.t. `to_image` baseline). embed_image : bool, default=False From 2c521b985eceee5017efaea07e53a238484c4c24 Mon Sep 17 00:00:00 2001 From: Jojo le Barjos Date: Wed, 8 Jan 2020 09:10:20 +0100 Subject: [PATCH 12/12] Add smoke test for to_svg --- test/test_wordcloud.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_wordcloud.py b/test/test_wordcloud.py index 88bbadded..b5bc868d7 100644 --- a/test/test_wordcloud.py +++ b/test/test_wordcloud.py @@ -6,6 +6,7 @@ from random import Random from numpy.testing import assert_array_equal from PIL import Image +import xml.etree.ElementTree as ET import matplotlib matplotlib.use('Agg') @@ -154,6 +155,13 @@ def test_check_errors(): assert "call generate" in str(e) +def test_svg_syntax(): + wc = WordCloud() + wc.generate(THIS) + svg = wc.to_svg() + ET.fromstring(svg) + + def test_recolor(): wc = WordCloud(max_words=50, colormap="jet") wc.generate(THIS)