Skip to content

Commit dda29ff

Browse files
authored
Fix and optimize EscapeUnescapeIri (#32025)
* Remove byte[] allocation per encoded character * Remove dead code from EscapeUnescapeIri * Use int instead of IntPtr for stack buffer * Use sizeof(int) instead of 4 as const * Fix EscapeUnescapeIri for escaped surrogate pairs
1 parent 7c62303 commit dda29ff

File tree

3 files changed

+71
-40
lines changed

3 files changed

+71
-40
lines changed

src/libraries/System.Private.Uri/src/System/IriHelper.cs

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -109,19 +109,11 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
109109
ValueStringBuilder dest = new ValueStringBuilder(size);
110110
byte[]? bytes = null;
111111

112-
const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB.
113-
int bufferRemaining = 0;
114-
115112
int next = start;
116113
char ch;
117-
bool escape = false;
118-
bool surrogatePair = false;
119114

120115
for (; next < end; ++next)
121116
{
122-
escape = false;
123-
surrogatePair = false;
124-
125117
if ((ch = pInput[next]) == '%')
126118
{
127119
if (next + 2 < end)
@@ -226,56 +218,61 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
226218
{
227219
// unicode
228220

229-
char ch2;
221+
bool escape;
222+
bool surrogatePair = false;
223+
224+
char ch2 = '\0';
230225

231226
if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
232227
{
233228
ch2 = pInput[next + 1];
234229
escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
235-
if (!escape)
236-
{
237-
// copy the two chars
238-
dest.Append(pInput[next++]);
239-
dest.Append(pInput[next]);
240-
}
241230
}
242231
else
243232
{
244-
if (CheckIriUnicodeRange(ch, component == UriComponents.Query))
233+
escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
234+
}
235+
236+
if (escape)
237+
{
238+
Span<byte> encodedBytes = stackalloc byte[4];
239+
240+
Rune rune;
241+
if (surrogatePair)
245242
{
246-
// copy it
247-
dest.Append(pInput[next]);
243+
rune = new Rune(ch, ch2);
248244
}
249-
else
245+
else if (!Rune.TryCreate(ch, out rune))
250246
{
251-
// escape it
252-
escape = true;
247+
rune = Rune.ReplacementChar;
253248
}
254-
}
255-
}
256-
else
257-
{
258-
// just copy the character
259-
dest.Append(pInput[next]);
260-
}
261249

262-
if (escape)
263-
{
264-
const int MaxNumberOfBytesEncoded = 4;
250+
int bytesWritten = rune.EncodeToUtf8(encodedBytes);
251+
encodedBytes = encodedBytes.Slice(0, bytesWritten);
265252

266-
byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded];
267-
fixed (byte* pEncodedBytes = &encodedBytes[0])
253+
foreach (byte b in encodedBytes)
254+
{
255+
UriHelper.EscapeAsciiChar(b, ref dest);
256+
}
257+
}
258+
else
268259
{
269-
int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded);
270-
Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount");
271-
272-
bufferRemaining -= encodedBytesCount * percentEncodingLen;
273-
274-
for (int count = 0; count < encodedBytesCount; ++count)
260+
dest.Append(ch);
261+
if (surrogatePair)
275262
{
276-
UriHelper.EscapeAsciiChar(encodedBytes[count], ref dest);
263+
dest.Append(ch2);
277264
}
278265
}
266+
267+
if (surrogatePair)
268+
{
269+
next++;
270+
}
271+
}
272+
else
273+
{
274+
// just copy the character
275+
dest.Append(pInput[next]);
279276
}
280277
}
281278

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System.Collections.Generic;
2+
using Xunit;
3+
4+
namespace System.PrivateUri.Tests
5+
{
6+
public class EscapeUnescapeIriTests
7+
{
8+
public static IEnumerable<object[]> ReplacesStandaloneSurrogatesWithReplacementChar()
9+
{
10+
const string UrlEncodedReplacementChar = "%EF%BF%BD";
11+
const string HighSurrogate = "\ud83f";
12+
const string LowSurrogate = "\udffe";
13+
14+
yield return new object[] { "a", "a" };
15+
yield return new object[] { HighSurrogate + LowSurrogate, "%F0%9F%BF%BE" };
16+
yield return new object[] { HighSurrogate, UrlEncodedReplacementChar };
17+
yield return new object[] { LowSurrogate, UrlEncodedReplacementChar };
18+
yield return new object[] { LowSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
19+
yield return new object[] { LowSurrogate + LowSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
20+
yield return new object[] { HighSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
21+
}
22+
23+
[Theory]
24+
[MemberData(nameof(ReplacesStandaloneSurrogatesWithReplacementChar))]
25+
public static void ReplacesStandaloneSurrogatesWithReplacementChar(string input, string expected)
26+
{
27+
const string Prefix = "scheme:";
28+
Uri uri = new Uri(Prefix + input);
29+
string actual = uri.AbsoluteUri.Substring(Prefix.Length);
30+
Assert.Equal(expected, actual);
31+
}
32+
}
33+
}

src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
</PropertyGroup>
55
<ItemGroup>
66
<Compile Include="AppxUriValue.cs" />
7+
<Compile Include="EscapeUnescapeIriTests.cs" />
78
<Compile Include="IdnCheckHostNameTest.cs" />
89
<Compile Include="IdnDnsSafeHostTest.cs" />
910
<Compile Include="IdnHostNameValidationTest.cs" />

0 commit comments

Comments
 (0)