From 0c550b75ceec0c302f78fab4a47d7dde3c534a59 Mon Sep 17 00:00:00 2001
From: Jack Stouffer <jack@jackstouffer.com>
Date: Fri, 22 Jul 2016 10:46:40 -0400
Subject: [PATCH 1/6] Make std.utf.toUTF8 DRY by using byChar internally

---
 std/utf.d | 73 +++++++++++++++----------------------------------------
 1 file changed, 19 insertions(+), 54 deletions(-)

diff --git a/std/utf.d b/std/utf.d
index 84b25dbda9a..99fe4007237 100644
--- a/std/utf.d
+++ b/std/utf.d
@@ -2462,67 +2462,32 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe
     }
 }
 
-/*******************
- * Encodes string $(D_PARAM s) into UTF-8 and returns the encoded string.
+/**
+ * Encodes string `s` into UTF-8 and returns the encoded string.
+ *
+ * Params:
+ *     s = the string to encode
+ * Returns:
+ *     A UTF-8 string
+ * See_Also:
+ *     For a lazy, non-allocating version of these functions, see $(LREF byUTF).
  */
-string toUTF8(scope const char[] s) @safe
+string toUTF8(S)(S s) if (isSomeString!S)
 {
-    validate(s);
-    return s.idup;
+    import std.array : array;
+    return s.byChar.array;
 }
 
-/// ditto
-string toUTF8(scope const wchar[] s) @safe
-{
-    char[] r;
-    size_t i;
-    immutable slen = s.length;
-
-    r.length = slen;
-    for (i = 0; i < slen; i++)
-    {
-        immutable c = s[i];
-
-        if (c <= 0x7F)
-            r[i] = cast(char)c;     // fast path for ascii
-        else
-        {
-            r.length = i;
-            while (i < slen)
-                encode(r, decode(s, i));
-            break;
-        }
-    }
-
-    return r;
-}
-
-/// ditto
-string toUTF8(scope const dchar[] s) @safe
+///
+@safe pure unittest
 {
-    char[] r;
-    size_t i;
-    immutable slen = s.length;
-
-    r.length = slen;
-    for (i = 0; i < slen; i++)
-    {
-        immutable c = s[i];
+    import std.algorithm.comparison : equal;
 
-        if (c <= 0x7F)
-            r[i] = cast(char)c;     // fast path for ascii
-        else
-        {
-            r.length = i;
-            foreach (dchar d; s[i .. slen])
-            {
-                encode(r, d);
-            }
-            break;
-        }
-    }
+    // The ö is represented by two UTF-8 code units
+    assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8]));
 
-    return r;
+    // 𐐷 is four code units in UTF-8
+    assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
 }
 
 

From 65c4648c273756f81672120054e19769a4ab0f81 Mon Sep 17 00:00:00 2001
From: Jack Stouffer <jack@jackstouffer.com>
Date: Fri, 22 Jul 2016 10:53:33 -0400
Subject: [PATCH 2/6] Remove over applied pure attribute from std.utf

---
 std/utf.d | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/std/utf.d b/std/utf.d
index 99fe4007237..8525f82521b 100644
--- a/std/utf.d
+++ b/std/utf.d
@@ -2418,11 +2418,7 @@ void validate(S)(in S str) @safe pure
 }
 
 /* =================== Conversion to UTF8 ======================= */
-
-pure
-{
-
-char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe
+char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
 {
     if (c <= 0x7F)
     {
@@ -2493,7 +2489,7 @@ string toUTF8(S)(S s) if (isSomeString!S)
 
 /* =================== Conversion to UTF16 ======================= */
 
-wchar[] toUTF16(return ref wchar[2] buf, dchar c) nothrow @nogc @safe
+wchar[] toUTF16(return ref wchar[2] buf, dchar c) nothrow @nogc @safe pure
 in
 {
     assert(isValidDchar(c));
@@ -2516,7 +2512,7 @@ body
 /****************
  * Encodes string $(D s) into UTF-16 and returns the encoded string.
  */
-wstring toUTF16(scope const char[] s) @safe
+wstring toUTF16(scope const char[] s) @safe pure
 {
     wchar[] r;
     immutable slen = s.length;
@@ -2542,14 +2538,14 @@ wstring toUTF16(scope const char[] s) @safe
 }
 
 /// ditto
-wstring toUTF16(scope const wchar[] s) @safe
+wstring toUTF16(scope const wchar[] s) @safe pure
 {
     validate(s);
     return s.idup;
 }
 
 /// ditto
-wstring toUTF16(scope const dchar[] s) @safe
+wstring toUTF16(scope const dchar[] s) @safe pure
 {
     wchar[] r;
     immutable slen = s.length;
@@ -2570,7 +2566,7 @@ wstring toUTF16(scope const dchar[] s) @safe
 /*****
  * Encodes string $(D_PARAM s) into UTF-32 and returns the encoded string.
  */
-dstring toUTF32(scope const char[] s) @safe
+dstring toUTF32(scope const char[] s) @safe pure
 {
     dchar[] r;
     immutable slen = s.length;
@@ -2591,7 +2587,7 @@ dstring toUTF32(scope const char[] s) @safe
 }
 
 /// ditto
-dstring toUTF32(scope const wchar[] s) @safe
+dstring toUTF32(scope const wchar[] s) @safe pure
 {
     dchar[] r;
     immutable slen = s.length;
@@ -2612,15 +2608,12 @@ dstring toUTF32(scope const wchar[] s) @safe
 }
 
 /// ditto
-dstring toUTF32(scope const dchar[] s) @safe
+dstring toUTF32(scope const dchar[] s) @safe pure
 {
     validate(s);
     return s.idup;
 }
 
-} // Convert functions are @safe
-
-
 /* =================== toUTFz ======================= */
 
 /++

From 2f24a0aba421ab9705126e718ad4a7b0650c320b Mon Sep 17 00:00:00 2001
From: Jack Stouffer <jack@jackstouffer.com>
Date: Fri, 22 Jul 2016 16:11:10 -0400
Subject: [PATCH 3/6] use appender instead of array

---
 std/utf.d | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/std/utf.d b/std/utf.d
index 8525f82521b..9a4da600d2f 100644
--- a/std/utf.d
+++ b/std/utf.d
@@ -2470,8 +2470,15 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
  */
 string toUTF8(S)(S s) if (isSomeString!S)
 {
-    import std.array : array;
-    return s.byChar.array;
+    import std.array : appender;
+
+    auto app = appender!string();
+    app.reserve(s.length);
+
+    foreach (c; s.byChar)
+        app.put(c);
+
+    return app.data;
 }
 
 ///

From 3cc0a7cba4a0b59cfd34e433201f89b2998cd089 Mon Sep 17 00:00:00 2001
From: Jack Stouffer <jack@jackstouffer.com>
Date: Sat, 23 Jul 2016 17:03:37 -0400
Subject: [PATCH 4/6] Added string specific logic

---
 std/utf.d | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/std/utf.d b/std/utf.d
index 9a4da600d2f..7ad2cdb5524 100644
--- a/std/utf.d
+++ b/std/utf.d
@@ -2470,15 +2470,19 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
  */
 string toUTF8(S)(S s) if (isSomeString!S)
 {
-    import std.array : appender;
-
-    auto app = appender!string();
-    app.reserve(s.length);
-
-    foreach (c; s.byChar)
-        app.put(c);
-
-    return app.data;
+    static if (is(S : string))
+    {
+        return s.idup;
+    }
+    else
+    {
+        import std.array : appender;
+        auto app = appender!string();
+        app.reserve(s.length);
+        foreach (c; s.byUTF2!char)
+            app.put(c);
+        return app.data;
+    }
 }
 
 ///

From ed92b3d33feb8be8a8d29f6be6dc838338c52815 Mon Sep 17 00:00:00 2001
From: Jack Stouffer <jack@jackstouffer.com>
Date: Sat, 23 Jul 2016 17:06:04 -0400
Subject: [PATCH 5/6] Range-ified

---
 std/utf.d | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/std/utf.d b/std/utf.d
index 7ad2cdb5524..958d0666a2d 100644
--- a/std/utf.d
+++ b/std/utf.d
@@ -2468,7 +2468,7 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
  * See_Also:
  *     For a lazy, non-allocating version of these functions, see $(LREF byUTF).
  */
-string toUTF8(S)(S s) if (isSomeString!S)
+string toUTF8(S)(S s) if (isInputRange!S && isSomeChar!(ElementEncodingType!S))
 {
     static if (is(S : string))
     {
@@ -2478,9 +2478,13 @@ string toUTF8(S)(S s) if (isSomeString!S)
     {
         import std.array : appender;
         auto app = appender!string();
-        app.reserve(s.length);
+
+        static if (hasLength!S || isSomeString!S)
+            app.reserve(s.length);
+
         foreach (c; s.byUTF2!char)
             app.put(c);
+
         return app.data;
     }
 }

From e096f29e186100faf187218acca9466f388a93c2 Mon Sep 17 00:00:00 2001
From: Jack Stouffer <jack@jackstouffer.com>
Date: Sat, 23 Jul 2016 21:11:21 -0400
Subject: [PATCH 6/6] Improved docs and added tests

---
 std/utf.d | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/std/utf.d b/std/utf.d
index 958d0666a2d..34279aec95e 100644
--- a/std/utf.d
+++ b/std/utf.d
@@ -2459,7 +2459,8 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
 }
 
 /**
- * Encodes string `s` into UTF-8 and returns the encoded string.
+ * Encodes the elements of `s` to UTF-8 and returns a newly allocated
+ * string of the elements.
  *
  * Params:
  *     s = the string to encode
@@ -2482,7 +2483,7 @@ string toUTF8(S)(S s) if (isInputRange!S && isSomeChar!(ElementEncodingType!S))
         static if (hasLength!S || isSomeString!S)
             app.reserve(s.length);
 
-        foreach (c; s.byUTF2!char)
+        foreach (c; s.byUTF!char)
             app.put(c);
 
         return app.data;
@@ -2501,6 +2502,18 @@ string toUTF8(S)(S s) if (isInputRange!S && isSomeChar!(ElementEncodingType!S))
     assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
 }
 
+@system pure unittest
+{
+    import std.internal.test.dummyrange : ReferenceInputRange;
+    import std.algorithm.comparison : equal;
+
+    auto r1 = new ReferenceInputRange!dchar("Hellø");
+    auto r2 = new ReferenceInputRange!dchar("𐐷");
+
+    assert(r1.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8]));
+    assert(r2.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7]));
+}
+
 
 /* =================== Conversion to UTF16 ======================= */