-
Notifications
You must be signed in to change notification settings - Fork 515
Less StringValue struct copies + bounds check elimination #2347
Changes from all commits
8a86e0f
776cc99
8e1a024
d9f74b3
301c8f9
b932a4f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
| using System.Diagnostics; | ||
| using System.Globalization; | ||
| using System.IO.Pipelines; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
| using System.Text; | ||
| using System.Threading.Tasks; | ||
|
|
@@ -360,23 +361,38 @@ internal void EnsureHostHeaderExists() | |
| // request message that contains more than one Host header field or a | ||
| // Host header field with an invalid field-value. | ||
|
|
||
| var host = HttpRequestHeaders.HeaderHost; | ||
| var hostText = host.ToString(); | ||
| if (host.Count <= 0) | ||
| var hostCount = HttpRequestHeaders.HostCount; | ||
| var hostText = HttpRequestHeaders.HeaderHost.ToString(); | ||
| if (hostCount <= 0) | ||
| { | ||
| if (_httpVersion == Http.HttpVersion.Http10) | ||
| { | ||
| return; | ||
| } | ||
| BadHttpRequestException.Throw(RequestRejectionReason.MissingHostHeader); | ||
| } | ||
| else if (host.Count > 1) | ||
| else if (hostCount > 1) | ||
| { | ||
| BadHttpRequestException.Throw(RequestRejectionReason.MultipleHostHeaders); | ||
| } | ||
| else if (_requestTargetForm == HttpRequestTarget.AuthorityForm) | ||
| else if (_requestTargetForm != HttpRequestTarget.OriginForm) | ||
| { | ||
| if (!host.Equals(RawTarget)) | ||
| // Tail call | ||
| ValidateNonOrginHostHeader(hostText); | ||
| } | ||
| else | ||
| { | ||
| // Tail call | ||
| HttpUtilities.ValidateHostHeader(hostText); | ||
| } | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.NoInlining)] | ||
| private void ValidateNonOrginHostHeader(string hostText) | ||
| { | ||
| if (_requestTargetForm == HttpRequestTarget.AuthorityForm) | ||
| { | ||
| if (hostText != RawTarget) | ||
| { | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
|
|
@@ -390,20 +406,18 @@ internal void EnsureHostHeaderExists() | |
|
|
||
| // System.Uri doesn't not tell us if the port was in the original string or not. | ||
| // When IsDefaultPort = true, we will allow Host: with or without the default port | ||
| if (host != _absoluteRequestTarget.Authority) | ||
| if (hostText != _absoluteRequestTarget.Authority) | ||
| { | ||
| if (!_absoluteRequestTarget.IsDefaultPort | ||
| || host != _absoluteRequestTarget.Authority + ":" + _absoluteRequestTarget.Port.ToString(CultureInfo.InvariantCulture)) | ||
| || hostText != _absoluteRequestTarget.Authority + ":" + _absoluteRequestTarget.Port.ToString(CultureInfo.InvariantCulture)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Tratcher This has nothing to do with this PR, but if the problem here is "System.Uri doesn't not tell us if the port was in the original string or not", why not just compare hostText to RawTarget instead of doing this complicated condition?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HostText isn't expected to be the full value of RawTarget, only part of it. |
||
| { | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (!HttpUtilities.IsValidHostHeader(hostText)) | ||
| { | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
| // Tail call | ||
| HttpUtilities.ValidateHostHeader(hostText); | ||
| } | ||
|
|
||
| protected override void OnReset() | ||
|
|
@@ -454,8 +468,7 @@ protected override bool TryParseRequest(ReadResult result, out bool endConnectio | |
| { | ||
| if (_requestProcessingStatus == RequestProcessingStatus.ParsingHeaders) | ||
| { | ||
| BadHttpRequestException.Throw(RequestRejectionReason | ||
| .MalformedRequestInvalidHeaders); | ||
| BadHttpRequestException.Throw(RequestRejectionReason.MalformedRequestInvalidHeaders); | ||
| } | ||
| throw; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,7 +45,14 @@ StringValues IHeaderDictionary.this[string key] | |
| { | ||
| ThrowHeadersReadOnlyException(); | ||
| } | ||
| SetValueFast(key, value); | ||
| if (value.Count == 0) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a behavior change?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No because it doesn't output null headers; however it means the bit flags can be trusted. Currently if you set the header to a null value it will set the bit flag saying it has a value; rather than changing it to say it doesn't have a value
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. null string/Empty.StringValues/empty array
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corner case would be if you set it to an array of null strings; then it would say it had a value with bit flag. However that's same as now (also it needs to know to clear it if its that). However if you add nulls; StringValues coalesces them to nothing so you'd explicitly need to use the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With dotnet/extensions#323 this changes to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should wait for this before merging.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not flowed yet |
||
| { | ||
| RemoveFast(key); | ||
| } | ||
| else | ||
| { | ||
| SetValueFast(key, value); | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -164,7 +171,7 @@ void IDictionary<string, StringValues>.Add(string key, StringValues value) | |
| ThrowHeadersReadOnlyException(); | ||
| } | ||
|
|
||
| if (!AddValueFast(key, value)) | ||
| if (value.Count > 0 && !AddValueFast(key, value)) | ||
| { | ||
| ThrowDuplicateKeyException(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -426,45 +426,53 @@ public static string SchemeToString(HttpScheme scheme) | |
| } | ||
| } | ||
|
|
||
| public static bool IsValidHostHeader(string hostText) | ||
| public static void ValidateHostHeader(string hostText) | ||
| { | ||
| // The spec allows empty values | ||
| if (string.IsNullOrEmpty(hostText)) | ||
| // This is a string.IsNullOrEmpty test, but arranged to elmininate the | ||
| // bounds check from accessing the firstChar of the string | ||
| if (hostText is null || 0u >= (uint)hostText.Length) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a breaking change if you look at ValidateHostHeader in isolation since now string full of nothing but whitespace could be rejected. I think this is safe though since we trim whitespace from all request header values.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, is stripped by header parsing
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @benaadams Does this faster than
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still not sure about
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is kinda funny looking... thinking about why
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could check the asm, it might; range check elimination misses obvious stuff though
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went on sharplab.io to try some variations on this, and I have to admit that the conventional version is worse than I thought: Now I'm wondering if we should just make this optimization in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
coreclr is pretty shut down to any enhancements atm until next week; but at that point any additions will be to 2.2, so a 6 month wait?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also you need both sides to be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just in case dotnet/coreclr#17512 |
||
| { | ||
| return true; | ||
| // The spec allows empty values | ||
| return; | ||
| } | ||
|
|
||
| if (hostText[0] == '[') | ||
| var firstChar = hostText[0]; | ||
| if (firstChar == '[') | ||
| { | ||
| return IsValidIPv6Host(hostText); | ||
| // Tail call | ||
| ValidateIPv6Host(hostText); | ||
| } | ||
|
|
||
| if (hostText[0] == ':') | ||
| else | ||
| { | ||
| // Only a port | ||
| return false; | ||
| } | ||
| if (firstChar == ':') | ||
| { | ||
| // Only a port | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
|
|
||
| var i = 0; | ||
| for (; i < hostText.Length; i++) | ||
| { | ||
| if (!IsValidHostChar(hostText[i])) | ||
| // Enregister array | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is enregister a technical term for changing a class reference to a local reference or something?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make sure it gets stored in a register
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compiler term, put variable in register. I hadn't heard it before I strayed too close to the Jit |
||
| var hostCharValidity = HostCharValidity; | ||
| var i = 0; | ||
| for (; i < hostText.Length; i++) | ||
| { | ||
| break; | ||
| var ch = (int)hostText[i]; | ||
| // Bounds check and elimiate second bounds check | ||
| if ((uint)ch >= (uint)hostCharValidity.Length || !hostCharValidity[ch]) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this faster than IsValidHostCharf? Does the second bounds check not get eliminated when
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, was generating the asm while changing these and refining them. Perhaps
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also using a class level array |
||
| { | ||
| break; | ||
| } | ||
| } | ||
| } | ||
| return IsValidHostPort(hostText, i); | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool IsValidHostChar(char ch) | ||
| { | ||
| return ch < HostCharValidity.Length && HostCharValidity[ch]; | ||
| if (i < hostText.Length) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this condition getting repeated in ValidateHostPort? Why check twice?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could drop the check from
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, is |
||
| { | ||
| // Tail call | ||
| ValidateHostPort(hostText, i); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // The lead '[' was already checked | ||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you removing this attribute from all the methods you expect to be TCO'd?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ish, making sure they are tail called (easy way, by not returning anything, no funky params and last action in function) But mostly its not inlining where there is no overall fast-path. So origin form is fully inlined, as a the char checks; but what's the right choice for the others? Is ipv6 via raw ip address a common scenerio? Is ports a common scenerio? (might be due to https on 443; but isn't for http on 80) |
||
| private static bool IsValidIPv6Host(string hostText) | ||
| private static void ValidateIPv6Host(string hostText) | ||
| { | ||
| for (var i = 1; i < hostText.Length; i++) | ||
| { | ||
|
|
@@ -474,58 +482,86 @@ private static bool IsValidIPv6Host(string hostText) | |
| // [::1] is the shortest valid IPv6 host | ||
| if (i < 4) | ||
| { | ||
| return false; | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
| else | ||
| { | ||
| // Tail call | ||
| ValidateHostPort(hostText, i + 1); | ||
| return; | ||
| } | ||
| return IsValidHostPort(hostText, i + 1); | ||
| } | ||
|
|
||
| if (!IsHex(ch) && ch != ':' && ch != '.') | ||
| { | ||
| return false; | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
| } | ||
|
|
||
| // Must contain a ']' | ||
| return false; | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool IsValidHostPort(string hostText, int offset) | ||
| private static void ValidateHostPort(string hostText, int offset) | ||
| { | ||
| if (offset == hostText.Length) | ||
| // Skip bounds check for accessing the [offset] element | ||
| if ((uint)offset >= (uint)hostText.Length) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the bounds check get repeated unless you cat offset to a uint? If so, that seems crazy. It seemed odd when the offset started as a char and it even seems more odd now that the offset is the int.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You have to check its not less than zero and also not greater than or equal to length; so two tests. Which is one test with the |
||
| { | ||
| return true; | ||
| return; | ||
| } | ||
|
|
||
| if (hostText[offset] != ':' || hostText.Length == offset + 1) | ||
| var firstChar = hostText[offset]; | ||
| offset++; | ||
| if (firstChar != ':' || (uint)offset >= (uint)hostText.Length) | ||
| { | ||
| // Must have at least one number after the colon if present. | ||
| return false; | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
|
|
||
| for (var i = offset + 1; i < hostText.Length; i++) | ||
| // This do+if check rather than for loop is to elimitate the bounds check, since | ||
| // the Jit doesn't currently pick up on it when starting at a variable offset | ||
| do | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think transforming simple for loops to complicated do/while loops is worthwhile when we can just wait for the JIT to handle this better.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be waiting a long time https://github.com/dotnet/coreclr/issues/15723#issuecomment-357152332
The issue here is its a range check per character; and when looking at the valid characters array that's another range check per character |
||
| { | ||
| if (!IsNumeric(hostText[i])) | ||
| // Elminate bounds check for array access | ||
| if ((uint)offset >= (uint)hostText.Length) | ||
| { | ||
| return false; | ||
| // Length reached, end of loop | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| return true; | ||
| var ch = hostText[offset]; | ||
| offset++; | ||
| if (!IsNumeric(ch)) | ||
| { | ||
| BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText); | ||
| } | ||
| } while (true); | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool IsNumeric(char ch) | ||
| { | ||
| return '0' <= ch && ch <= '9'; | ||
| // '0' <= ch && ch <= '9' | ||
| // (uint)(ch - '0') <= (uint)('9' - '0') | ||
|
|
||
| // Subtract start of range '0' | ||
| // Cast to uint to change negative numbers to large numbers | ||
| // Check if less than 10 representing chars '0' - '9' | ||
| return (uint)(ch - '0') < 10u; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we know that a subtraction then a comparison is faster than two comparisons? I ask because the earlier version certainly does read better which I guess is why you left it there as a comment. I have more or less the same question about the IsHex change. I know you've posted some nice results from the ResponseHeadersWritingBenchmark and the Http1ConnectionBenchmark, but I'm more interested in the performance of these methods in isolation.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its an extra branch prediction/mis-prediction per character; rather than the extra comparison that's the issue; cpu can only run with so many branches in parallel before you hit a pipeline bubble: Control_hazards_(branch_hazards)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do you know changing
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No branch vs 1 branch?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense... I'd feel much more warm and fuzzy about it if there were some BenchmarkDotNet results to back up this theory.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have returned to my command center of my big screen monitors, so should be able to rummage something up. |
||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool IsHex(char ch) | ||
| { | ||
| return IsNumeric(ch) | ||
| || ('a' <= ch && ch <= 'f') | ||
| || ('A' <= ch && ch <= 'F'); | ||
| // || ('a' <= ch && ch <= 'f') | ||
| // || ('A' <= ch && ch <= 'F'); | ||
|
|
||
| // Lowercase indiscriminately (or with 32) | ||
| // Subtract start of range 'a' | ||
| // Cast to uint to change negative numbers to large numbers | ||
| // Check if less than 6 representing chars 'a' - 'f' | ||
| || (uint)((ch | 32) - 'a') < 6u; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is reducing 4 branches to 1 branch |
||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool. Do we know for sure that the tail call is getting eliminated by the JIT? Or the C# compiler itself? If not, the comment could be a little misleading.