From 2f130bc055c892480d99782fad8e3beaf27bbc16 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Mon, 23 Dec 2019 16:17:06 +0100
Subject: [PATCH 01/12] Add to_svg prototype, using freetype-py to get metrics

---
 wordcloud/wordcloud.py | 127 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 83b5590d4..3fc004764 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -728,6 +728,133 @@ def __array__(self):
 
     def to_html(self):
         raise NotImplementedError("FIXME!!!")
+    
+    def to_svg(self):
+        """Export to SVG.
+
+        Returns
+        -------
+        content : string
+            Word cloud image as SVG string
+        """
+
+        # Make sure layout is generated
+        self._check_generated()
+
+        # Get output size, in pixels
+        if self.mask is not None:
+            width = self.mask.shape[1]
+            height = self.mask.shape[0]
+        else:
+            height, width = self.height, self.width
+
+        # Import locally, to make this dependency optional
+        # TODO try to use Pillow's bindings instead, to avoid additional dependency
+        import freetype
+
+        # Use FreeType to analyze font
+        face = freetype.Face(self.font_path)
+
+        # Compute text bounding box
+        # TODO maybe promote this as a full-fledged internal method
+        def compute_box(text):
+            previous_char = 0
+            pen_x, pen_y = 0, 0
+            min_x, min_y = 0, 0
+            max_x, max_y = 0, 0
+            for char in text:
+                face.load_char(char, freetype.FT_LOAD_RENDER)
+                kerning = face.get_kerning(previous_char, char)
+                previous_char = char
+
+                # Get raw bitmap
+                width  = face.glyph.bitmap.width
+                rows   = face.glyph.bitmap.rows
+                top    = face.glyph.bitmap_top
+                left   = face.glyph.bitmap_left
+
+                # Apply character bounding box
+                pen_x += kerning.x
+                x0 = (pen_x >> 6) + left
+                x1 = x0 + width
+                y0 = (pen_y >> 6) - (rows - top)
+                y1 = y0 + rows
+
+                # Update global bounding box
+                min_x, max_x = min(min_x, x0), max(max_x, x1)
+                min_y, max_y = min(min_y, y0), max(max_y, y1)
+
+                # Move pointer
+                pen_x += face.glyph.advance.x
+                pen_y += face.glyph.advance.y
+
+            return min_x, min_y, max_x, max_y
+
+        # Text buffer
+        result = []
+
+        # Prepare global style
+        style = {}
+        # TODO properly escape/quote this
+        # TODO should add option to specify URL for font (i.e. WOFF file)
+        # TODO should maybe add option to embed font in SVG file
+        style['font-family'] = repr(face.family_name.decode('utf-8'))
+        if face.style_flags & freetype.FT_STYLE_FLAG_BOLD:
+            style['font-weight'] = 'bold'
+        if face.style_flags & freetype.FT_STYLE_FLAG_ITALIC:
+            style['font-weight'] = 'italic'
+        style = ';'.join(':'.join(pair) for pair in style.items())
+
+        # Add header
+        result.append(
+            '<svg'
+            ' xmlns="http://www.w3.org/2000/svg"'
+            ' width="{}"'
+            ' height="{}"'
+            ' style="{}"'
+            '>'
+            .format(width, height, style)
+        )
+
+        # Add background
+        if self.background_color is not None:
+            result.append(
+                '<rect'
+                ' width="100%"'
+                ' height="100%"'
+                ' style="fill:{}"'
+                '>'
+                '</rect>'
+                .format(self.background_color)
+            )
+
+        # For each word in layout
+        for (word, count), font_size, (y, x), orientation, color in self.layout_:
+
+            # Compute text bounding box
+            face.set_char_size(font_size * 64)
+            min_x, min_y, max_x, max_y = compute_box(word)
+
+            # Compute text attributes
+            attributes = {}
+            if orientation == Image.ROTATE_90:
+                x += max_y
+                y += max_x - min_x
+                attributes['transform'] = 'translate({},{}) rotate(-90)'.format(x, y)
+            else:
+                x += min_x
+                y += max_y
+                attributes['transform'] = 'translate({},{})'.format(x, y)
+            attributes['font-size'] = '{}'.format(font_size)
+            attributes['style'] = 'fill:{}'.format(color)
+
+            # Create node
+            attributes = ' '.join('{}="{}"'.format(k, v) for k, v in attributes.items())
+            result.append('<text {}>{}</text>'.format(attributes, word))
+
+        # Complete SVG file
+        result.append('</svg>')
+        return '\n'.join(result)
 
     def _get_bolean_mask(self, mask):
         """Cast to two dimensional boolean mask."""

From 2030ff90659a6e21e26c932f9afacc3c613f4e51 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Mon, 23 Dec 2019 22:54:59 +0100
Subject: [PATCH 02/12] Use PIL bindings instead of freetype-py

---
 wordcloud/wordcloud.py | 73 +++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 48 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 3fc004764..d15e5210b 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -742,67 +742,37 @@ def to_svg(self):
         self._check_generated()
 
         # Get output size, in pixels
+        # TODO check self.scale
         if self.mask is not None:
             width = self.mask.shape[1]
             height = self.mask.shape[0]
         else:
             height, width = self.height, self.width
-
-        # Import locally, to make this dependency optional
-        # TODO try to use Pillow's bindings instead, to avoid additional dependency
-        import freetype
-
-        # Use FreeType to analyze font
-        face = freetype.Face(self.font_path)
-
-        # Compute text bounding box
-        # TODO maybe promote this as a full-fledged internal method
-        def compute_box(text):
-            previous_char = 0
-            pen_x, pen_y = 0, 0
-            min_x, min_y = 0, 0
-            max_x, max_y = 0, 0
-            for char in text:
-                face.load_char(char, freetype.FT_LOAD_RENDER)
-                kerning = face.get_kerning(previous_char, char)
-                previous_char = char
-
-                # Get raw bitmap
-                width  = face.glyph.bitmap.width
-                rows   = face.glyph.bitmap.rows
-                top    = face.glyph.bitmap_top
-                left   = face.glyph.bitmap_left
-
-                # Apply character bounding box
-                pen_x += kerning.x
-                x0 = (pen_x >> 6) + left
-                x1 = x0 + width
-                y0 = (pen_y >> 6) - (rows - top)
-                y1 = y0 + rows
-
-                # Update global bounding box
-                min_x, max_x = min(min_x, x0), max(max_x, x1)
-                min_y, max_y = min(min_y, y0), max(max_y, y1)
-
-                # Move pointer
-                pen_x += face.glyph.advance.x
-                pen_y += face.glyph.advance.y
-
-            return min_x, min_y, max_x, max_y
+        
+        # Get max font size
+        if self.max_font_size is None:
+            max_font_size = max(w[1] for w in self.layout_)
+        else:
+            max_font_size = self.max_font_size
 
         # Text buffer
         result = []
 
         # Prepare global style
         style = {}
+        font = ImageFont.truetype(self.font_path, int(max_font_size))
+        font_family, font_style = font.getname()
         # TODO properly escape/quote this
         # TODO should add option to specify URL for font (i.e. WOFF file)
         # TODO should maybe add option to embed font in SVG file
-        style['font-family'] = repr(face.family_name.decode('utf-8'))
-        if face.style_flags & freetype.FT_STYLE_FLAG_BOLD:
+        style['font-family'] = repr(font_family)
+        font_style = font_style.lower()
+        if 'bold' in font_style:
             style['font-weight'] = 'bold'
-        if face.style_flags & freetype.FT_STYLE_FLAG_ITALIC:
-            style['font-weight'] = 'italic'
+        if 'italic' in font_style:
+            style['font-style'] = 'italic'
+        elif 'oblique' in font_style:
+            style['font-style'] = 'oblique'
         style = ';'.join(':'.join(pair) for pair in style.items())
 
         # Add header
@@ -831,9 +801,16 @@ def compute_box(text):
         # For each word in layout
         for (word, count), font_size, (y, x), orientation, color in self.layout_:
 
+            # Get text metrics
+            font = ImageFont.truetype(self.font_path, int(font_size))
+            (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word)
+            ascent, descent = font.getmetrics()
+            
             # Compute text bounding box
-            face.set_char_size(font_size * 64)
-            min_x, min_y, max_x, max_y = compute_box(word)
+            min_x = offset_x
+            max_x = size_x - offset_x
+            min_y = ascent - size_y
+            max_y = ascent - offset_y
 
             # Compute text attributes
             attributes = {}

From ad12167f6157d75018229cbdaa978a5dbc5cf5aa Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Mon, 23 Dec 2019 23:02:02 +0100
Subject: [PATCH 03/12] Handle scale attribute

---
 wordcloud/wordcloud.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index d15e5210b..0067f5564 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -742,13 +742,12 @@ def to_svg(self):
         self._check_generated()
 
         # Get output size, in pixels
-        # TODO check self.scale
         if self.mask is not None:
             width = self.mask.shape[1]
             height = self.mask.shape[0]
         else:
             height, width = self.height, self.width
-        
+
         # Get max font size
         if self.max_font_size is None:
             max_font_size = max(w[1] for w in self.layout_)
@@ -760,7 +759,7 @@ def to_svg(self):
 
         # Prepare global style
         style = {}
-        font = ImageFont.truetype(self.font_path, int(max_font_size))
+        font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale))
         font_family, font_style = font.getname()
         # TODO properly escape/quote this
         # TODO should add option to specify URL for font (i.e. WOFF file)
@@ -783,7 +782,7 @@ def to_svg(self):
             ' height="{}"'
             ' style="{}"'
             '>'
-            .format(width, height, style)
+            .format(width * self.scale, height * self.scale, style)
         )
 
         # Add background
@@ -800,12 +799,14 @@ def to_svg(self):
 
         # For each word in layout
         for (word, count), font_size, (y, x), orientation, color in self.layout_:
+            x *= self.scale
+            y *= self.scale
 
             # Get text metrics
-            font = ImageFont.truetype(self.font_path, int(font_size))
+            font = ImageFont.truetype(self.font_path, int(font_size * self.scale))
             (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word)
             ascent, descent = font.getmetrics()
-            
+
             # Compute text bounding box
             min_x = offset_x
             max_x = size_x - offset_x
@@ -822,7 +823,7 @@ def to_svg(self):
                 x += min_x
                 y += max_y
                 attributes['transform'] = 'translate({},{})'.format(x, y)
-            attributes['font-size'] = '{}'.format(font_size)
+            attributes['font-size'] = '{}'.format(font_size * self.scale)
             attributes['style'] = 'fill:{}'.format(color)
 
             # Create node

From 28ac67c5f5a3af54f41479ad36371e0301f09518 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Tue, 24 Dec 2019 20:30:14 +0100
Subject: [PATCH 04/12] Small fix in offset

---
 wordcloud/wordcloud.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 0067f5564..7842e7450 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -761,9 +761,10 @@ def to_svg(self):
         style = {}
         font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale))
         font_family, font_style = font.getname()
-        # TODO properly escape/quote this
+        # TODO properly escape/quote this name
         # TODO should add option to specify URL for font (i.e. WOFF file)
         # TODO should maybe add option to embed font in SVG file
+        # TODO when embedding, we should try to embed only a subset
         style['font-family'] = repr(font_family)
         font_style = font_style.lower()
         if 'bold' in font_style:
@@ -801,14 +802,15 @@ def to_svg(self):
         for (word, count), font_size, (y, x), orientation, color in self.layout_:
             x *= self.scale
             y *= self.scale
-
+            
             # Get text metrics
             font = ImageFont.truetype(self.font_path, int(font_size * self.scale))
             (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word)
             ascent, descent = font.getmetrics()
 
             # Compute text bounding box
-            min_x = offset_x
+            # TODO some browser do not render glyphs the same way (e.g. Segoe Script in Chrome is different, while in Internet Explorer it matches to_image)
+            min_x = -offset_x
             max_x = size_x - offset_x
             min_y = ascent - size_y
             max_y = ascent - offset_y
@@ -830,6 +832,8 @@ def to_svg(self):
             attributes = ' '.join('{}="{}"'.format(k, v) for k, v in attributes.items())
             result.append('<text {}>{}</text>'.format(attributes, word))
 
+        # TODO draw contour
+
         # Complete SVG file
         result.append('</svg>')
         return '\n'.join(result)

From a71825edacca7795b6b27d801ed671474107ee8a Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Tue, 24 Dec 2019 20:44:34 +0100
Subject: [PATCH 05/12] Add option to embed image in SVG, useful for debug

---
 wordcloud/wordcloud.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 7842e7450..11df1af1e 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -9,8 +9,10 @@
 
 import warnings
 from random import Random
+import io
 import os
 import re
+import base64
 import sys
 import colorsys
 import matplotlib
@@ -729,7 +731,7 @@ def __array__(self):
     def to_html(self):
         raise NotImplementedError("FIXME!!!")
     
-    def to_svg(self):
+    def to_svg(self, embed_image=False):
         """Export to SVG.
 
         Returns
@@ -797,6 +799,21 @@ def to_svg(self):
                 '</rect>'
                 .format(self.background_color)
             )
+        
+        # Embed image, useful for debug purpose
+        if embed_image:
+            image = self.to_image()
+            data = io.BytesIO()
+            image.save(data, format='JPEG')
+            data = base64.b64encode(data.getbuffer()).decode('ascii')
+            result.append(
+                '<image'
+                ' width="100%"'
+                ' height="100%"'
+                ' href="data:image/jpg;base64,{}"'
+                '/>'
+                .format(data)
+            )
 
         # For each word in layout
         for (word, count), font_size, (y, x), orientation, color in self.layout_:

From 49af1e9dca932e63c2d16dee64847dcc7485b554 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Sun, 29 Dec 2019 22:32:43 +0100
Subject: [PATCH 06/12] Fix documentation and code style

---
 wordcloud/wordcloud.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 11df1af1e..650963594 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -730,10 +730,24 @@ def __array__(self):
 
     def to_html(self):
         raise NotImplementedError("FIXME!!!")
-    
+
     def to_svg(self, embed_image=False):
         """Export to SVG.
 
+        Font is assumed to be available to the SVG reader. Otherwise, text
+        coordinates may produce artifacts when rendered with replacement font.
+
+        Note that some renderers do not handle glyphs the same way, and may
+        differ from `to_image` result. In particular, handwriting-like fonts
+        (e.g. Segoe Script) ligatures might not be properly rendered, which
+        could introduce discrepancies in tight layouts.
+
+        Parameters
+        ----------
+        embed_image : bool, default=False
+            Whether to include rasterized image inside resulting SVG file.
+            Useful for debugging.
+
         Returns
         -------
         content : string
@@ -799,7 +813,7 @@ def to_svg(self, embed_image=False):
                 '</rect>'
                 .format(self.background_color)
             )
-        
+
         # Embed image, useful for debug purpose
         if embed_image:
             image = self.to_image()
@@ -819,17 +833,15 @@ def to_svg(self, embed_image=False):
         for (word, count), font_size, (y, x), orientation, color in self.layout_:
             x *= self.scale
             y *= self.scale
-            
+
             # Get text metrics
             font = ImageFont.truetype(self.font_path, int(font_size * self.scale))
             (size_x, size_y), (offset_x, offset_y) = font.font.getsize(word)
             ascent, descent = font.getmetrics()
 
             # Compute text bounding box
-            # TODO some browser do not render glyphs the same way (e.g. Segoe Script in Chrome is different, while in Internet Explorer it matches to_image)
             min_x = -offset_x
             max_x = size_x - offset_x
-            min_y = ascent - size_y
             max_y = ascent - offset_y
 
             # Compute text attributes

From 061ee0671dabb0b19f99f3e336d8b5c52dcf6b17 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Sun, 29 Dec 2019 23:15:20 +0100
Subject: [PATCH 07/12] Properly escape words in XML

---
 wordcloud/wordcloud.py | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 650963594..595efe100 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -18,6 +18,7 @@
 import matplotlib
 import numpy as np
 from operator import itemgetter
+from xml.sax import saxutils
 
 from PIL import Image
 from PIL import ImageColor
@@ -754,6 +755,10 @@ def to_svg(self, embed_image=False):
             Word cloud image as SVG string
         """
 
+        # TODO should add option to specify URL for font (i.e. WOFF file)
+        # TODO should maybe add option to embed font in SVG file
+        # TODO when embedding, we should try to embed only a subset
+
         # Make sure layout is generated
         self._check_generated()
 
@@ -778,9 +783,6 @@ def to_svg(self, embed_image=False):
         font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale))
         font_family, font_style = font.getname()
         # TODO properly escape/quote this name
-        # TODO should add option to specify URL for font (i.e. WOFF file)
-        # TODO should maybe add option to embed font in SVG file
-        # TODO when embedding, we should try to embed only a subset
         style['font-family'] = repr(font_family)
         font_style = font_style.lower()
         if 'bold' in font_style:
@@ -797,9 +799,13 @@ def to_svg(self, embed_image=False):
             ' xmlns="http://www.w3.org/2000/svg"'
             ' width="{}"'
             ' height="{}"'
-            ' style="{}"'
+            ' style={}'
             '>'
-            .format(width * self.scale, height * self.scale, style)
+            .format(
+                width * self.scale,
+                height * self.scale,
+                saxutils.quoteattr(style)
+            )
         )
 
         # Add background
@@ -849,17 +855,29 @@ def to_svg(self, embed_image=False):
             if orientation == Image.ROTATE_90:
                 x += max_y
                 y += max_x - min_x
-                attributes['transform'] = 'translate({},{}) rotate(-90)'.format(x, y)
+                transform = 'translate({},{}) rotate(-90)'.format(x, y)
             else:
                 x += min_x
                 y += max_y
-                attributes['transform'] = 'translate({},{})'.format(x, y)
-            attributes['font-size'] = '{}'.format(font_size * self.scale)
-            attributes['style'] = 'fill:{}'.format(color)
+                transform = 'translate({},{})'.format(x, y)
 
             # Create node
             attributes = ' '.join('{}="{}"'.format(k, v) for k, v in attributes.items())
-            result.append('<text {}>{}</text>'.format(attributes, word))
+            result.append(
+                '<text'
+                ' transform="{}"'
+                ' font-size="{}"'
+                ' style="fill:{}"'
+                '>'
+                '{}'
+                '</text>'
+                .format(
+                    transform,
+                    font_size * self.scale,
+                    color,
+                    saxutils.escape(word)
+                )
+            )
 
         # TODO draw contour
 

From 4ccece5643910f66571921e6a3129c409dd96be2 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Fri, 3 Jan 2020 13:42:18 +0100
Subject: [PATCH 08/12] Add embed_font option for to_svg

---
 wordcloud/wordcloud.py | 96 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 79 insertions(+), 17 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 595efe100..1af45e11b 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -732,11 +732,13 @@ def __array__(self):
     def to_html(self):
         raise NotImplementedError("FIXME!!!")
 
-    def to_svg(self, embed_image=False):
+    def to_svg(self, embed_image=False, embed_font=False):
         """Export to SVG.
 
         Font is assumed to be available to the SVG reader. Otherwise, text
         coordinates may produce artifacts when rendered with replacement font.
+        It is also possible to include the original font in WOFF format using
+        `embed_font`.
 
         Note that some renderers do not handle glyphs the same way, and may
         differ from `to_image` result. In particular, handwriting-like fonts
@@ -749,6 +751,9 @@ def to_svg(self, embed_image=False):
             Whether to include rasterized image inside resulting SVG file.
             Useful for debugging.
 
+        embed_font : bool, default=False
+            Whether to include font inside resulting SVG file.
+
         Returns
         -------
         content : string
@@ -756,7 +761,6 @@ def to_svg(self, embed_image=False):
         """
 
         # TODO should add option to specify URL for font (i.e. WOFF file)
-        # TODO should maybe add option to embed font in SVG file
         # TODO when embedding, we should try to embed only a subset
 
         # Make sure layout is generated
@@ -778,20 +782,22 @@ def to_svg(self, embed_image=False):
         # Text buffer
         result = []
 
-        # Prepare global style
-        style = {}
+        # Get font information
         font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale))
-        font_family, font_style = font.getname()
+        raw_font_family, raw_font_style = font.getname()
         # TODO properly escape/quote this name
-        style['font-family'] = repr(font_family)
-        font_style = font_style.lower()
-        if 'bold' in font_style:
-            style['font-weight'] = 'bold'
-        if 'italic' in font_style:
-            style['font-style'] = 'italic'
-        elif 'oblique' in font_style:
-            style['font-style'] = 'oblique'
-        style = ';'.join(':'.join(pair) for pair in style.items())
+        font_family = repr(raw_font_family)
+        raw_font_style = raw_font_style.lower()
+        if 'bold' in raw_font_style:
+            font_weight = 'bold'
+        else:
+            font_weight = 'normal'
+        if 'italic' in raw_font_style:
+            font_style = 'italic'
+        elif 'oblique' in raw_font_style:
+            font_style = 'oblique'
+        else:
+            font_style = 'normal'
 
         # Add header
         result.append(
@@ -799,12 +805,68 @@ def to_svg(self, embed_image=False):
             ' xmlns="http://www.w3.org/2000/svg"'
             ' width="{}"'
             ' height="{}"'
-            ' style={}'
             '>'
             .format(
                 width * self.scale,
-                height * self.scale,
-                saxutils.quoteattr(style)
+                height * self.scale
+            )
+        )
+
+        # Embed font, if requested
+        if embed_font:
+
+            # Import here, to avoid hard dependency on fonttools
+            import fontTools
+            import fontTools.subset as subset
+
+            # Load and subset font
+            options = subset.Options()
+            ttf = subset.load_font(self.font_path, options)
+            # TODO do subset
+
+            # Export as WOFF
+            # TODO is there a better method, i.e. directly export to WOFF?
+            buffer = io.BytesIO()
+            ttf.saveXML(buffer)
+            buffer.seek(0)
+            woff = fontTools.ttLib.TTFont(flavor='woff')
+            woff.importXML(buffer)
+
+            # Create stylesheet with embedded font face
+            buffer = io.BytesIO()
+            woff.save(buffer)
+            data = base64.b64encode(buffer.getbuffer()).decode('ascii')
+            url = 'data:application/font-woff;charset=utf-8;base64,' + data
+            result.append(
+                '<style>'
+                '@font-face{{'
+                'font-family:{};'
+                'font-weight:{};'
+                'font-style:{};'
+                'src:url("{}")format("woff");'
+                '}}'
+                '</style>'
+                .format(
+                    font_family,
+                    font_weight,
+                    font_style,
+                    url
+                )
+            )
+
+        # Select global style
+        result.append(
+            '<style>'
+            'text{{'
+            'font-family:{};'
+            'font-weight:{};'
+            'font-style:{};'
+            '}}'
+            '</style>'
+            .format(
+                font_family,
+                font_weight,
+                font_style
             )
         )
 

From 4ed503c5edd6877fb397964db21bcfdb63a2b9d3 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Sat, 4 Jan 2020 11:29:19 +0100
Subject: [PATCH 09/12] Subset font when embedding in SVG

---
 wordcloud/wordcloud.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 1af45e11b..b7539951c 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -737,14 +737,16 @@ def to_svg(self, embed_image=False, embed_font=False):
 
         Font is assumed to be available to the SVG reader. Otherwise, text
         coordinates may produce artifacts when rendered with replacement font.
-        It is also possible to include the original font in WOFF format using
-        `embed_font`.
+        It is also possible to include a subset of the original font in WOFF
+        format using `embed_font` (requires `fontTools`).
 
         Note that some renderers do not handle glyphs the same way, and may
         differ from `to_image` result. In particular, handwriting-like fonts
         (e.g. Segoe Script) ligatures might not be properly rendered, which
         could introduce discrepancies in tight layouts.
 
+        Contour drawing is not supported.
+
         Parameters
         ----------
         embed_image : bool, default=False
@@ -761,7 +763,6 @@ def to_svg(self, embed_image=False, embed_font=False):
         """
 
         # TODO should add option to specify URL for font (i.e. WOFF file)
-        # TODO when embedding, we should try to embed only a subset
 
         # Make sure layout is generated
         self._check_generated()
@@ -785,8 +786,9 @@ def to_svg(self, embed_image=False, embed_font=False):
         # Get font information
         font = ImageFont.truetype(self.font_path, int(max_font_size * self.scale))
         raw_font_family, raw_font_style = font.getname()
-        # TODO properly escape/quote this name
+        # TODO properly escape/quote this name?
         font_family = repr(raw_font_family)
+        # TODO better support for uncommon font styles/weights?
         raw_font_style = raw_font_style.lower()
         if 'bold' in raw_font_style:
             font_weight = 'bold'
@@ -817,12 +819,28 @@ def to_svg(self, embed_image=False, embed_font=False):
 
             # Import here, to avoid hard dependency on fonttools
             import fontTools
-            import fontTools.subset as subset
+            import fontTools.subset
+
+            # Subset options
+            options = fontTools.subset.Options(
+
+                # Small impact on character shapes, but reduce size a lot
+                hinting=False,
+
+                # On small subsets, can improve size
+                desubroutinize=True,
+
+                # Try to be lenient
+                ignore_missing_glyphs=True,
+            )
 
             # Load and subset font
-            options = subset.Options()
-            ttf = subset.load_font(self.font_path, options)
-            # TODO do subset
+            ttf = fontTools.subset.load_font(self.font_path, options)
+            subsetter = fontTools.subset.Subsetter(options)
+            characters = {c for item in self.layout_ for c in item[0][0]}
+            text = ''.join(characters)
+            subsetter.populate(text=text)
+            subsetter.subset(ttf)
 
             # Export as WOFF
             # TODO is there a better method, i.e. directly export to WOFF?

From 674e0fa8cd865e015b812ff6f87c3be169817bb6 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Sat, 4 Jan 2020 11:37:23 +0100
Subject: [PATCH 10/12] Add option for font optimization

---
 wordcloud/wordcloud.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index b7539951c..434ffc53f 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -732,7 +732,7 @@ def __array__(self):
     def to_html(self):
         raise NotImplementedError("FIXME!!!")
 
-    def to_svg(self, embed_image=False, embed_font=False):
+    def to_svg(self, embed_font=False, optimize_embedded_font=True, embed_image=False):
         """Export to SVG.
 
         Font is assumed to be available to the SVG reader. Otherwise, text
@@ -749,13 +749,18 @@ def to_svg(self, embed_image=False, embed_font=False):
 
         Parameters
         ----------
+        embed_font : bool, default=False
+            Whether to include font inside resulting SVG file.
+
+        optimize_embedded_font : bool, default=True
+            Whether to be aggressive when embedding a font, to reduce size. In
+            particular, hinting tables are dropped, which may introduces slight
+            changes to character shapes (w.r.t. `to_image` baseline).
+
         embed_image : bool, default=False
             Whether to include rasterized image inside resulting SVG file.
             Useful for debugging.
 
-        embed_font : bool, default=False
-            Whether to include font inside resulting SVG file.
-
         Returns
         -------
         content : string
@@ -825,10 +830,10 @@ def to_svg(self, embed_image=False, embed_font=False):
             options = fontTools.subset.Options(
 
                 # Small impact on character shapes, but reduce size a lot
-                hinting=False,
+                hinting=not optimize_embedded_font,
 
                 # On small subsets, can improve size
-                desubroutinize=True,
+                desubroutinize=optimize_embedded_font,
 
                 # Try to be lenient
                 ignore_missing_glyphs=True,

From 289fef597fbfed325db577a98ba06809fb0b1fa5 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Tue, 7 Jan 2020 23:36:37 +0100
Subject: [PATCH 11/12] Update to_svg documentation about Complex Text Layout

---
 wordcloud/wordcloud.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/wordcloud/wordcloud.py b/wordcloud/wordcloud.py
index 434ffc53f..a026bb6e8 100644
--- a/wordcloud/wordcloud.py
+++ b/wordcloud/wordcloud.py
@@ -738,12 +738,22 @@ def to_svg(self, embed_font=False, optimize_embedded_font=True, embed_image=Fals
         Font is assumed to be available to the SVG reader. Otherwise, text
         coordinates may produce artifacts when rendered with replacement font.
         It is also possible to include a subset of the original font in WOFF
-        format using `embed_font` (requires `fontTools`).
+        format using ``embed_font`` (requires `fontTools`).
 
         Note that some renderers do not handle glyphs the same way, and may
-        differ from `to_image` result. In particular, handwriting-like fonts
-        (e.g. Segoe Script) ligatures might not be properly rendered, which
-        could introduce discrepancies in tight layouts.
+        differ from ``to_image`` result. In particular, Complex Text Layout may
+        not be supported. In this typesetting, the shape or positioning of a
+        grapheme depends on its relation to other graphemes.
+
+        Pillow, since version 4.2.0, supports CTL using ``libraqm``. However,
+        due to dependencies, this feature is not always enabled. Hence, the
+        same rendering differences may appear in ``to_image``. As this
+        rasterized output is used to compute the layout, this also affects the
+        layout generation. Use ``PIL.features.check`` to test availability of
+        ``raqm``.
+
+        Consistant rendering is therefore expected if both Pillow and the SVG
+        renderer have the same support of CTL.
 
         Contour drawing is not supported.
 
@@ -754,7 +764,7 @@ def to_svg(self, embed_font=False, optimize_embedded_font=True, embed_image=Fals
 
         optimize_embedded_font : bool, default=True
             Whether to be aggressive when embedding a font, to reduce size. In
-            particular, hinting tables are dropped, which may introduces slight
+            particular, hinting tables are dropped, which may introduce slight
             changes to character shapes (w.r.t. `to_image` baseline).
 
         embed_image : bool, default=False

From 2c521b985eceee5017efaea07e53a238484c4c24 Mon Sep 17 00:00:00 2001
From: Jojo le Barjos <jojolebarjos@gmail.com>
Date: Wed, 8 Jan 2020 09:10:20 +0100
Subject: [PATCH 12/12] Add smoke test for to_svg

---
 test/test_wordcloud.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/test_wordcloud.py b/test/test_wordcloud.py
index 88bbadded..b5bc868d7 100644
--- a/test/test_wordcloud.py
+++ b/test/test_wordcloud.py
@@ -6,6 +6,7 @@
 from random import Random
 from numpy.testing import assert_array_equal
 from PIL import Image
+import xml.etree.ElementTree as ET
 
 import matplotlib
 matplotlib.use('Agg')
@@ -154,6 +155,13 @@ def test_check_errors():
         assert "call generate" in str(e)
 
 
+def test_svg_syntax():
+    wc = WordCloud()
+    wc.generate(THIS)
+    svg = wc.to_svg()
+    ET.fromstring(svg)
+
+
 def test_recolor():
     wc = WordCloud(max_words=50, colormap="jet")
     wc.generate(THIS)