diff --git a/src/Common/src/CoreLib/System/Text/ValueStringBuilder.cs b/src/Common/src/CoreLib/System/Text/ValueStringBuilder.cs index d41bea0be9d7..1e33bdb49b4d 100644 --- a/src/Common/src/CoreLib/System/Text/ValueStringBuilder.cs +++ b/src/Common/src/CoreLib/System/Text/ValueStringBuilder.cs @@ -22,6 +22,13 @@ public ValueStringBuilder(Span initialBuffer) _pos = 0; } + public ValueStringBuilder(int initialCapacity) + { + _arrayToReturnToPool = ArrayPool.Shared.Rent(initialCapacity); + _chars = _arrayToReturnToPool; + _pos = 0; + } + public int Length { get => _pos; diff --git a/src/Common/tests/Tests/System/Text/ValueStringBuilderTests.cs b/src/Common/tests/Tests/System/Text/ValueStringBuilderTests.cs index e5155e540a01..a6205b21ba37 100644 --- a/src/Common/tests/Tests/System/Text/ValueStringBuilderTests.cs +++ b/src/Common/tests/Tests/System/Text/ValueStringBuilderTests.cs @@ -30,6 +30,17 @@ public void Ctor_Span_CanAppend() Assert.Equal("a", vsb.ToString()); } + [Fact] + public void Ctor_InitialCapacity_CanAppend() + { + var vsb = new ValueStringBuilder(1); + Assert.Equal(0, vsb.Length); + + vsb.Append('a'); + Assert.Equal(1, vsb.Length); + Assert.Equal("a", vsb.ToString()); + } + [Fact] public void Append_Char_MatchesStringBuilder() { @@ -152,6 +163,26 @@ public void Insert_IntCharInt_MatchesStringBuilder() Assert.Equal(sb.ToString(), vsb.ToString()); } + [Fact] + public void AsSpan_ReturnsCorrectValue_DoesntClearBuilder() + { + var sb = new StringBuilder(); + var vsb = new ValueStringBuilder(); + + for (int i = 1; i <= 100; i++) + { + string s = i.ToString(); + sb.Append(s); + vsb.Append(s); + } + + var resultString = new string(vsb.AsSpan()); + Assert.Equal(sb.ToString(), resultString); + + Assert.NotEqual(0, sb.Length); + Assert.Equal(sb.Length, vsb.Length); + } + [Fact] public void ToString_ClearsBuilder_ThenReusable() { @@ -213,6 +244,27 @@ public void TryCopyTo_ClearsBuilder_ThenReusable() Assert.Equal(Text2, vsb.ToString()); } + [Fact] + public void Dispose_ClearsBuilder_ThenReusable() + { + const string Text1 = "test"; + var vsb = new ValueStringBuilder(); + + vsb.Append(Text1); + Assert.Equal(Text1.Length, vsb.Length); + + vsb.Dispose(); + + Assert.Equal(0, vsb.Length); + Assert.Equal(string.Empty, vsb.ToString()); + Assert.True(vsb.TryCopyTo(Span.Empty, out _)); + + const string Text2 = "another test"; + vsb.Append(Text2); + Assert.Equal(Text2.Length, vsb.Length); + Assert.Equal(Text2, vsb.ToString()); + } + [Fact] public unsafe void Indexer() { diff --git a/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj b/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj index 539b86b128f2..ba595f9c54a1 100644 --- a/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj +++ b/src/System.Runtime.Extensions/src/System.Runtime.Extensions.csproj @@ -67,9 +67,6 @@ Common\CoreLib\System\Threading\Tasks\TaskToApm.cs - - Common\System\Text\StringBuilderCache.cs - Common\System\HResults.cs diff --git a/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs b/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs index 1c0c0570fac9..766907468fe6 100644 --- a/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs +++ b/src/System.Runtime.Extensions/src/System/Net/WebUtility.cs @@ -5,6 +5,8 @@ // Don't entity encode high chars (160 to 256) #define ENTITY_ENCODE_HIGH_ASCII_CHARS +using System; +using System.Buffers; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -26,11 +28,10 @@ public static class WebUtility private const int UNICODE_PLANE16_END = 0x10FFFF; private const int UnicodeReplacementChar = '\uFFFD'; + private const int MaxInt32Digits = 10; #region HtmlEncode / HtmlDecode methods - private static readonly char[] s_htmlEntityEndingChars = new char[] { ';', '&' }; - public static string HtmlEncode(string value) { if (string.IsNullOrEmpty(value)) @@ -38,104 +39,146 @@ public static string HtmlEncode(string value) return value; } - // Don't create StringBuilder if we don't have anything to encode - int index = IndexOfHtmlEncodingChars(value, 0); + ReadOnlySpan valueSpan = value.AsSpan(); + + // Don't create ValueStringBuilder if we don't have anything to encode + int index = IndexOfHtmlEncodingChars(valueSpan); if (index == -1) { return value; } - StringBuilder sb = StringBuilderCache.Acquire(value.Length); - HtmlEncode(value, index, sb); - return StringBuilderCache.GetStringAndRelease(sb); + // For small inputs we allocate on the stack. In most cases a buffer three + // times larger the original string should be sufficient as usually not all + // characters need to be encoded. + // For larger string we rent the input string's length plus a fixed + // conservative amount of chars from the ArrayPool. + Span buffer = value.Length < 80 ? + stackalloc char[256] : + null; + ValueStringBuilder sb = buffer != null ? + new ValueStringBuilder(buffer) : + new ValueStringBuilder(value.Length + 200); + + sb.Append(valueSpan.Slice(0, index)); + HtmlEncode(valueSpan.Slice(index), ref sb); + + return sb.ToString(); } public static void HtmlEncode(string value, TextWriter output) { - output.Write(HtmlEncode(value)); + if (output == null) + { + throw new ArgumentNullException(nameof(output)); + } + if (string.IsNullOrEmpty(value)) + { + output.Write(value); + return; + } + + ReadOnlySpan valueSpan = value.AsSpan(); + + // Don't create ValueStringBuilder if we don't have anything to encode + int index = IndexOfHtmlEncodingChars(valueSpan); + if (index == -1) + { + output.Write(value); + return; + } + + // For small inputs we allocate on the stack. In most cases a buffer three + // times larger the original string should be sufficient as usually not all + // characters need to be encoded. + // For larger string we rent the input string's length plus a fixed + // conservative amount of chars from the ArrayPool. + Span buffer = value.Length < 80 ? + stackalloc char[256] : + null; + ValueStringBuilder sb = buffer != null ? + new ValueStringBuilder(buffer) : + new ValueStringBuilder(value.Length + 200); + + sb.Append(valueSpan.Slice(0, index)); + HtmlEncode(valueSpan.Slice(index), ref sb); + + output.Write(sb.AsSpan()); + sb.Dispose(); } - private static unsafe void HtmlEncode(string value, int index, StringBuilder output) + private static void HtmlEncode(ReadOnlySpan input, ref ValueStringBuilder output) { - Debug.Assert(value != null); - Debug.Assert(output != null); - Debug.Assert(0 <= index && index <= value.Length, "0 <= index && index <= value.Length"); - - int cch = value.Length - index; - fixed (char* str = value) + for (int i = 0; i < input.Length; i++) { - char* pch = str; - while (index-- > 0) + char ch = input[i]; + if (ch <= '>') { - output.Append(*pch++); + switch (ch) + { + case '<': + output.Append("<"); + break; + case '>': + output.Append(">"); + break; + case '"': + output.Append("""); + break; + case '\'': + output.Append("'"); + break; + case '&': + output.Append("&"); + break; + default: + output.Append(ch); + break; + } } - - for (; cch > 0; cch--, pch++) + else { - char ch = *pch; - if (ch <= '>') + int valueToEncode = -1; // set to >= 0 if needs to be encoded + +#if ENTITY_ENCODE_HIGH_ASCII_CHARS + if (ch >= 160 && ch < 256) { - switch (ch) - { - case '<': - output.Append("<"); - break; - case '>': - output.Append(">"); - break; - case '"': - output.Append("""); - break; - case '\'': - output.Append("'"); - break; - case '&': - output.Append("&"); - break; - default: - output.Append(ch); - break; - } + // The seemingly arbitrary 160 comes from RFC + valueToEncode = ch; } else +#endif // ENTITY_ENCODE_HIGH_ASCII_CHARS + if (char.IsSurrogate(ch)) { - int valueToEncode = -1; // set to >= 0 if needs to be encoded - -#if ENTITY_ENCODE_HIGH_ASCII_CHARS - if (ch >= 160 && ch < 256) + int scalarValue = GetNextUnicodeScalarValueFromUtf16Surrogate(input, ref i); + if (scalarValue >= UNICODE_PLANE01_START) { - // The seemingly arbitrary 160 comes from RFC - valueToEncode = ch; + valueToEncode = scalarValue; } else -#endif // ENTITY_ENCODE_HIGH_ASCII_CHARS - if (Char.IsSurrogate(ch)) { - int scalarValue = GetNextUnicodeScalarValueFromUtf16Surrogate(ref pch, ref cch); - if (scalarValue >= UNICODE_PLANE01_START) - { - valueToEncode = scalarValue; - } - else - { - // Don't encode BMP characters (like U+FFFD) since they wouldn't have - // been encoded if explicitly present in the string anyway. - ch = (char)scalarValue; - } + // Don't encode BMP characters (like U+FFFD) since they wouldn't have + // been encoded if explicitly present in the string anyway. + ch = (char)scalarValue; } + } - if (valueToEncode >= 0) - { - // value needs to be encoded - output.Append("&#"); - output.Append(valueToEncode.ToString(CultureInfo.InvariantCulture)); - output.Append(';'); - } - else - { - // write out the character directly - output.Append(ch); - } + if (valueToEncode >= 0) + { + // value needs to be encoded + output.Append("&#"); + + // Use the buffer directly and reserve a conservative estimate of 10 chars. + Span encodingBuffer = output.AppendSpan(MaxInt32Digits); + valueToEncode.TryFormat(encodingBuffer, out int charsWritten); // Invariant + output.Length -= (MaxInt32Digits - charsWritten); + + output.Append(';'); + } + else + { + // write out the character directly + output.Append(ch); } } } @@ -148,60 +191,93 @@ public static string HtmlDecode(string value) return value; } - // Don't create StringBuilder if we don't have anything to encode - if (!StringRequiresHtmlDecoding(value)) + ReadOnlySpan valueSpan = value.AsSpan(); + + int index = IndexOfHtmlDecodingChars(valueSpan); + if (index == -1) { return value; } - StringBuilder sb = StringBuilderCache.Acquire(value.Length); - HtmlDecode(value, sb); - return StringBuilderCache.GetStringAndRelease(sb); + // In the worst case the decoded string has the same length. + // For small inputs we use stack allocation. + Span buffer = value.Length <= 256 ? + stackalloc char[256] : + null; + ValueStringBuilder sb = buffer != null ? + new ValueStringBuilder(buffer) : + new ValueStringBuilder(value.Length); + + sb.Append(valueSpan.Slice(0, index)); + HtmlDecode(valueSpan.Slice(index), ref sb); + + return sb.ToString(); } public static void HtmlDecode(string value, TextWriter output) { - output.Write(HtmlDecode(value)); + if (output == null) + { + throw new ArgumentNullException(nameof(output)); + } + + if (string.IsNullOrEmpty(value)) + { + output.Write(value); + return; + } + + ReadOnlySpan valueSpan = value.AsSpan(); + + int index = IndexOfHtmlDecodingChars(valueSpan); + if (index == -1) + { + output.Write(value); + return; + } + + // In the worst case the decoded string has the same length. + // For small inputs we use stack allocation. + Span buffer = value.Length <= 256 ? + stackalloc char[256] : + null; + ValueStringBuilder sb = buffer != null ? + new ValueStringBuilder(buffer) : + new ValueStringBuilder(value.Length); + + sb.Append(valueSpan.Slice(0, index)); + HtmlDecode(valueSpan.Slice(index), ref sb); + + output.Write(sb.AsSpan()); + sb.Dispose(); } - [SuppressMessage("Microsoft.Usage", "CA1806:DoNotIgnoreMethodResults", MessageId = "System.UInt16.TryParse(System.String,System.Globalization.NumberStyles,System.IFormatProvider,System.UInt16@)", Justification = "UInt16.TryParse guarantees that result is zero if the parse fails.")] - private static void HtmlDecode(string value, StringBuilder output) + private static void HtmlDecode(ReadOnlySpan input, ref ValueStringBuilder output) { - Debug.Assert(output != null); - - int l = value.Length; - for (int i = 0; i < l; i++) + for (int i = 0; i < input.Length; i++) { - char ch = value[i]; + char ch = input[i]; if (ch == '&') { // We found a '&'. Now look for the next ';' or '&'. The idea is that // if we find another '&' before finding a ';', then this is not an entity, // and the next '&' might start a real entity (VSWhidbey 275184) - int index = value.IndexOfAny(s_htmlEntityEndingChars, i + 1); - if (index > 0 && value[index] == ';') + ReadOnlySpan inputSlice = input.Slice(i + 1); + int entityLength = inputSlice.IndexOf(';'); + if (entityLength >= 0) { - int entityOffset = i + 1; - int entityLength = index - entityOffset; - - if (entityLength > 1 && value[entityOffset] == '#') + int entityEndPosition = (i + 1) + entityLength; + if (entityLength > 1 && inputSlice[0] == '#') { // The # syntax can be in decimal or hex, e.g. // å --> decimal // å --> same char in hex // See http://www.w3.org/TR/REC-html40/charset.html#entities - bool parsedSuccessfully; - uint parsedValue; - if (value[entityOffset + 1] == 'x' || value[entityOffset + 1] == 'X') - { - parsedSuccessfully = uint.TryParse(value.AsSpan(entityOffset + 2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out parsedValue); - } - else - { - parsedSuccessfully = uint.TryParse(value.AsSpan(entityOffset + 1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); - } + bool parsedSuccessfully = inputSlice[1] == 'x' || inputSlice[1] == 'X' + ? uint.TryParse(inputSlice.Slice(2, entityLength - 2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture, out uint parsedValue) + : uint.TryParse(inputSlice.Slice(1, entityLength - 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out parsedValue); if (parsedSuccessfully) { @@ -219,22 +295,21 @@ private static void HtmlDecode(string value, StringBuilder output) else { // multi-character - char leadingSurrogate, trailingSurrogate; - ConvertSmpToUtf16(parsedValue, out leadingSurrogate, out trailingSurrogate); + ConvertSmpToUtf16(parsedValue, out char leadingSurrogate, out char trailingSurrogate); output.Append(leadingSurrogate); output.Append(trailingSurrogate); } - i = index; // already looked at everything until semicolon + i = entityEndPosition; // already looked at everything until semicolon continue; } } else { - ReadOnlySpan entity = value.AsSpan(entityOffset, entityLength); - i = index; // already looked at everything until semicolon - + ReadOnlySpan entity = inputSlice.Slice(0, entityLength); + i = entityEndPosition; // already looked at everything until semicolon char entityChar = HtmlEntities.Lookup(entity); + if (entityChar != (char)0) { ch = entityChar; @@ -254,38 +329,32 @@ private static void HtmlDecode(string value, StringBuilder output) } } - private static unsafe int IndexOfHtmlEncodingChars(string s, int startPos) + private static int IndexOfHtmlEncodingChars(ReadOnlySpan input) { - Debug.Assert(0 <= startPos && startPos <= s.Length, "0 <= startPos && startPos <= s.Length"); - - int cch = s.Length - startPos; - fixed (char* str = s) + for (int i = 0; i < input.Length; i++) { - for (char* pch = &str[startPos]; cch > 0; pch++, cch--) + char ch = input[i]; + if (ch <= '>') { - char ch = *pch; - if (ch <= '>') + switch (ch) { - switch (ch) - { - case '<': - case '>': - case '"': - case '\'': - case '&': - return s.Length - cch; - } + case '<': + case '>': + case '"': + case '\'': + case '&': + return i; } + } #if ENTITY_ENCODE_HIGH_ASCII_CHARS - else if (ch >= 160 && ch < 256) - { - return s.Length - cch; - } + else if (ch >= 160 && ch < 256) + { + return i; + } #endif // ENTITY_ENCODE_HIGH_ASCII_CHARS - else if (Char.IsSurrogate(ch)) - { - return s.Length - cch; - } + else if (char.IsSurrogate(ch)) + { + return i; } } @@ -295,7 +364,7 @@ private static unsafe int IndexOfHtmlEncodingChars(string s, int startPos) #endregion #region UrlEncode implementation - + private static void GetEncodedBytes(byte[] originalBytes, int offset, int count, byte[] expandedBytes) { int pos = 0; @@ -331,9 +400,9 @@ private static void GetEncodedBytes(byte[] originalBytes, int offset, int count, } } -#endregion + #endregion -#region UrlEncode public methods + #region UrlEncode public methods [SuppressMessage("Microsoft.Design", "CA1055:UriReturnValuesShouldNotBeStrings", Justification = "Already shipped public API; code moved here as part of API consolidation")] public static string UrlEncode(string value) @@ -382,7 +451,7 @@ public static string UrlEncode(string value) // beginning of the array. byte[] newBytes = new byte[byteCount + byteIndex]; Encoding.UTF8.GetBytes(value, 0, value.Length, newBytes, byteIndex); - + GetEncodedBytes(newBytes, byteIndex, byteCount, newBytes); return Encoding.UTF8.GetString(newBytes); } @@ -422,9 +491,9 @@ public static byte[] UrlEncodeToBytes(byte[] value, int offset, int count) return expandedBytes; } -#endregion + #endregion -#region UrlDecode implementation + #region UrlDecode implementation private static string UrlDecodeInternal(string value, Encoding encoding) { @@ -472,7 +541,7 @@ private static string UrlDecodeInternal(string value, Encoding encoding) else helper.AddChar(ch); } - + if (!needsDecodingUnsafe) { if (needsDecodingSpaces) @@ -530,9 +599,9 @@ private static byte[] UrlDecodeInternal(byte[] bytes, int offset, int count) return decodedBytes; } -#endregion + #endregion -#region UrlDecode public methods + #region UrlDecode public methods [SuppressMessage("Microsoft.Design", "CA1055:UriReturnValuesShouldNotBeStrings", Justification = "Already shipped public API; code moved here as part of API consolidation")] @@ -546,9 +615,9 @@ public static byte[] UrlDecodeToBytes(byte[] encodedValue, int offset, int count return UrlDecodeInternal(encodedValue, offset, count); } -#endregion + #endregion -#region Helper methods + #region Helper methods // similar to Char.ConvertFromUtf32, but doesn't check arguments or generate strings // input is assumed to be an SMP character @@ -561,35 +630,32 @@ private static void ConvertSmpToUtf16(uint smpChar, out char leadingSurrogate, o trailingSurrogate = (char)((utf32 % 0x400) + LOW_SURROGATE_START); } - private static unsafe int GetNextUnicodeScalarValueFromUtf16Surrogate(ref char* pch, ref int charsRemaining) + private static int GetNextUnicodeScalarValueFromUtf16Surrogate(ReadOnlySpan input, ref int index) { // invariants - Debug.Assert(charsRemaining >= 1); - Debug.Assert(Char.IsSurrogate(*pch)); + Debug.Assert(input.Length >= 1); + Debug.Assert(char.IsSurrogate(input[0])); - if (charsRemaining <= 1) + if (input.Length <= 1) { // not enough characters remaining to resurrect the original scalar value return UnicodeReplacementChar; } - char leadingSurrogate = pch[0]; - char trailingSurrogate = pch[1]; + char leadingSurrogate = input[0]; + char trailingSurrogate = input[1]; - if (Char.IsSurrogatePair(leadingSurrogate, trailingSurrogate)) - { - // we're going to consume an extra char - pch++; - charsRemaining--; - - // below code is from Char.ConvertToUtf32, but without the checks (since we just performed them) - return (((leadingSurrogate - HIGH_SURROGATE_START) * 0x400) + (trailingSurrogate - LOW_SURROGATE_START) + UNICODE_PLANE01_START); - } - else + if (!char.IsSurrogatePair(leadingSurrogate, trailingSurrogate)) { // unmatched surrogate return UnicodeReplacementChar; } + + // we're going to consume an extra char + index++; + + // below code is from Char.ConvertToUtf32, but without the checks (since we just performed them) + return (((leadingSurrogate - HIGH_SURROGATE_START) * 0x400) + (trailingSurrogate - LOW_SURROGATE_START) + UNICODE_PLANE01_START); } private static int HexToInt(char h) @@ -673,21 +739,22 @@ private static bool ValidateUrlEncodingParameters(byte[] bytes, int offset, int return true; } - private static bool StringRequiresHtmlDecoding(string s) + private static int IndexOfHtmlDecodingChars(ReadOnlySpan input) { // this string requires html decoding if it contains '&' or a surrogate character - for (int i = 0; i < s.Length; i++) + for (int i = 0; i < input.Length; i++) { - char c = s[i]; - if (c == '&' || Char.IsSurrogate(c)) + char c = input[i]; + if (c == '&' || char.IsSurrogate(c)) { - return true; + return i; } } - return false; + + return -1; } -#endregion + #endregion // Internal struct to facilitate URL decoding -- keeps char buffer and byte buffer, allows appending of either chars or bytes private struct UrlDecoder @@ -1044,7 +1111,7 @@ public static char Lookup(ReadOnlySpan entity) { // Currently, there are no entities that are longer than 8 characters. return (char)0; - } + } } private static ulong ToUInt64Key(ReadOnlySpan entity)