diff --git a/std/utf.d b/std/utf.d index 84b25dbda9a..34279aec95e 100644 --- a/std/utf.d +++ b/std/utf.d @@ -2418,11 +2418,7 @@ void validate(S)(in S str) @safe pure } /* =================== Conversion to UTF8 ======================= */ - -pure -{ - -char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe +char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure { if (c <= 0x7F) { @@ -2462,73 +2458,66 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe } } -/******************* - * Encodes string $(D_PARAM s) into UTF-8 and returns the encoded string. +/** + * Encodes the elements of `s` to UTF-8 and returns a newly allocated + * string of the elements. + * + * Params: + * s = the string to encode + * Returns: + * A UTF-8 string + * See_Also: + * For a lazy, non-allocating version of these functions, see $(LREF byUTF). */ -string toUTF8(scope const char[] s) @safe +string toUTF8(S)(S s) if (isInputRange!S && isSomeChar!(ElementEncodingType!S)) { - validate(s); - return s.idup; -} + static if (is(S : string)) + { + return s.idup; + } + else + { + import std.array : appender; + auto app = appender!string(); -/// ditto -string toUTF8(scope const wchar[] s) @safe -{ - char[] r; - size_t i; - immutable slen = s.length; + static if (hasLength!S || isSomeString!S) + app.reserve(s.length); - r.length = slen; - for (i = 0; i < slen; i++) - { - immutable c = s[i]; + foreach (c; s.byUTF!char) + app.put(c); - if (c <= 0x7F) - r[i] = cast(char)c; // fast path for ascii - else - { - r.length = i; - while (i < slen) - encode(r, decode(s, i)); - break; - } + return app.data; } - - return r; } -/// ditto -string toUTF8(scope const dchar[] s) @safe +/// +@safe pure unittest { - char[] r; - size_t i; - immutable slen = s.length; + import std.algorithm.comparison : equal; - r.length = slen; - for (i = 0; i < slen; i++) - { - immutable c = s[i]; + // The ö is represented by two UTF-8 code units + assert("Hellø"w.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8])); - if (c <= 0x7F) - r[i] = cast(char)c; // fast path for ascii - else - { - r.length = i; - foreach (dchar d; s[i .. slen]) - { - encode(r, d); - } - break; - } - } + // 𐐷 is four code units in UTF-8 + assert("𐐷"d.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7])); +} - return r; +@system pure unittest +{ + import std.internal.test.dummyrange : ReferenceInputRange; + import std.algorithm.comparison : equal; + + auto r1 = new ReferenceInputRange!dchar("Hellø"); + auto r2 = new ReferenceInputRange!dchar("𐐷"); + + assert(r1.toUTF8.equal(['H', 'e', 'l', 'l', 0xC3, 0xB8])); + assert(r2.toUTF8.equal([0xF0, 0x90, 0x90, 0xB7])); } /* =================== Conversion to UTF16 ======================= */ -wchar[] toUTF16(return ref wchar[2] buf, dchar c) nothrow @nogc @safe +wchar[] toUTF16(return ref wchar[2] buf, dchar c) nothrow @nogc @safe pure in { assert(isValidDchar(c)); @@ -2551,7 +2540,7 @@ body /**************** * Encodes string $(D s) into UTF-16 and returns the encoded string. */ -wstring toUTF16(scope const char[] s) @safe +wstring toUTF16(scope const char[] s) @safe pure { wchar[] r; immutable slen = s.length; @@ -2577,14 +2566,14 @@ wstring toUTF16(scope const char[] s) @safe } /// ditto -wstring toUTF16(scope const wchar[] s) @safe +wstring toUTF16(scope const wchar[] s) @safe pure { validate(s); return s.idup; } /// ditto -wstring toUTF16(scope const dchar[] s) @safe +wstring toUTF16(scope const dchar[] s) @safe pure { wchar[] r; immutable slen = s.length; @@ -2605,7 +2594,7 @@ wstring toUTF16(scope const dchar[] s) @safe /***** * Encodes string $(D_PARAM s) into UTF-32 and returns the encoded string. */ -dstring toUTF32(scope const char[] s) @safe +dstring toUTF32(scope const char[] s) @safe pure { dchar[] r; immutable slen = s.length; @@ -2626,7 +2615,7 @@ dstring toUTF32(scope const char[] s) @safe } /// ditto -dstring toUTF32(scope const wchar[] s) @safe +dstring toUTF32(scope const wchar[] s) @safe pure { dchar[] r; immutable slen = s.length; @@ -2647,15 +2636,12 @@ dstring toUTF32(scope const wchar[] s) @safe } /// ditto -dstring toUTF32(scope const dchar[] s) @safe +dstring toUTF32(scope const dchar[] s) @safe pure { validate(s); return s.idup; } -} // Convert functions are @safe - - /* =================== toUTFz ======================= */ /++