Skip to content
This repository was archived by the owner on Dec 18, 2018. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 27 additions & 14 deletions src/Kestrel.Core/Internal/Http/Http1Connection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Diagnostics;
using System.Globalization;
using System.IO.Pipelines;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
Expand Down Expand Up @@ -360,23 +361,38 @@ internal void EnsureHostHeaderExists()
// request message that contains more than one Host header field or a
// Host header field with an invalid field-value.

var host = HttpRequestHeaders.HeaderHost;
var hostText = host.ToString();
if (host.Count <= 0)
var hostCount = HttpRequestHeaders.HostCount;
var hostText = HttpRequestHeaders.HeaderHost.ToString();
if (hostCount <= 0)
{
if (_httpVersion == Http.HttpVersion.Http10)
{
return;
}
BadHttpRequestException.Throw(RequestRejectionReason.MissingHostHeader);
}
else if (host.Count > 1)
else if (hostCount > 1)
{
BadHttpRequestException.Throw(RequestRejectionReason.MultipleHostHeaders);
}
else if (_requestTargetForm == HttpRequestTarget.AuthorityForm)
else if (_requestTargetForm != HttpRequestTarget.OriginForm)
{
if (!host.Equals(RawTarget))
// Tail call
ValidateNonOrginHostHeader(hostText);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Do we know for sure that the tail call is getting eliminated by the JIT? Or the C# compiler itself? If not, the comment could be a little misleading.

}
else
{
// Tail call
HttpUtilities.ValidateHostHeader(hostText);
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private void ValidateNonOrginHostHeader(string hostText)
{
if (_requestTargetForm == HttpRequestTarget.AuthorityForm)
{
if (hostText != RawTarget)
{
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
Expand All @@ -390,20 +406,18 @@ internal void EnsureHostHeaderExists()

// System.Uri doesn't not tell us if the port was in the original string or not.
// When IsDefaultPort = true, we will allow Host: with or without the default port
if (host != _absoluteRequestTarget.Authority)
if (hostText != _absoluteRequestTarget.Authority)
{
if (!_absoluteRequestTarget.IsDefaultPort
|| host != _absoluteRequestTarget.Authority + ":" + _absoluteRequestTarget.Port.ToString(CultureInfo.InvariantCulture))
|| hostText != _absoluteRequestTarget.Authority + ":" + _absoluteRequestTarget.Port.ToString(CultureInfo.InvariantCulture))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Tratcher This has nothing to do with this PR, but if the problem here is "System.Uri doesn't not tell us if the port was in the original string or not", why not just compare hostText to RawTarget instead of doing this complicated condition?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HostText isn't expected to be the full value of RawTarget, only part of it.

{
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
}
}

if (!HttpUtilities.IsValidHostHeader(hostText))
{
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
// Tail call
HttpUtilities.ValidateHostHeader(hostText);
}

protected override void OnReset()
Expand Down Expand Up @@ -454,8 +468,7 @@ protected override bool TryParseRequest(ReadResult result, out bool endConnectio
{
if (_requestProcessingStatus == RequestProcessingStatus.ParsingHeaders)
{
BadHttpRequestException.Throw(RequestRejectionReason
.MalformedRequestInvalidHeaders);
BadHttpRequestException.Throw(RequestRejectionReason.MalformedRequestInvalidHeaders);
}
throw;
}
Expand Down
11 changes: 5 additions & 6 deletions src/Kestrel.Core/Internal/Http/Http1MessageBody.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,10 @@ public static MessageBody For(
// see also http://tools.ietf.org/html/rfc2616#section-4.4
var keepAlive = httpVersion != HttpVersion.Http10;

var connection = headers.HeaderConnection;
var upgrade = false;
if (connection.Count > 0)
if (headers.HasConnection)
{
var connectionOptions = HttpHeaders.ParseConnection(connection);
var connectionOptions = HttpHeaders.ParseConnection(headers.HeaderConnection);

upgrade = (connectionOptions & ConnectionOptions.Upgrade) == ConnectionOptions.Upgrade;
keepAlive = (connectionOptions & ConnectionOptions.KeepAlive) == ConnectionOptions.KeepAlive;
Expand All @@ -233,10 +232,10 @@ public static MessageBody For(
return new ForUpgrade(context);
}

var transferEncoding = headers.HeaderTransferEncoding;
if (transferEncoding.Count > 0)
if (headers.HasTransferEncoding)
{
var transferCoding = HttpHeaders.GetFinalTransferCoding(headers.HeaderTransferEncoding);
var transferEncoding = headers.HeaderTransferEncoding;
var transferCoding = HttpHeaders.GetFinalTransferCoding(transferEncoding);

// https://tools.ietf.org/html/rfc7230#section-3.3.3
// If a Transfer-Encoding header field
Expand Down
11 changes: 11 additions & 0 deletions src/Kestrel.Core/Internal/Http/HttpHeaders.Generated.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ public partial class HttpRequestHeaders

private long _bits = 0;
private HeaderReferences _headers;

public bool HasConnection => (_bits & 2L) != 0;
public bool HasTransferEncoding => (_bits & 64L) != 0;

public int HostCount => _headers._Host.Count;

public StringValues HeaderCacheControl
{
Expand Down Expand Up @@ -4794,6 +4799,12 @@ public partial class HttpResponseHeaders

private long _bits = 0;
private HeaderReferences _headers;

public bool HasConnection => (_bits & 2L) != 0;
public bool HasDate => (_bits & 4L) != 0;
public bool HasTransferEncoding => (_bits & 64L) != 0;
public bool HasServer => (_bits & 33554432L) != 0;


public StringValues HeaderCacheControl
{
Expand Down
11 changes: 9 additions & 2 deletions src/Kestrel.Core/Internal/Http/HttpHeaders.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,14 @@ StringValues IHeaderDictionary.this[string key]
{
ThrowHeadersReadOnlyException();
}
SetValueFast(key, value);
if (value.Count == 0)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a behavior change?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No because it doesn't output null headers; however it means the bit flags can be trusted. Currently if you set the header to a null value it will set the bit flag saying it has a value; rather than changing it to say it doesn't have a value

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

null string/Empty.StringValues/empty array

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corner case would be if you set it to an array of null strings; then it would say it had a value with bit flag. However that's same as now (also it needs to know to clear it if its that).

However if you add nulls; StringValues coalesces them to nothing so you'd explicitly need to use the new StringValues(string[]) constructor

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With dotnet/extensions#323 this changes to

value.IsNull

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should wait for this before merging.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not flowed yet

{
RemoveFast(key);
}
else
{
SetValueFast(key, value);
}
}
}

Expand Down Expand Up @@ -164,7 +171,7 @@ void IDictionary<string, StringValues>.Add(string key, StringValues value)
ThrowHeadersReadOnlyException();
}

if (!AddValueFast(key, value))
if (value.Count > 0 && !AddValueFast(key, value))
{
ThrowDuplicateKeyException();
}
Expand Down
4 changes: 2 additions & 2 deletions src/Kestrel.Core/Internal/Http/HttpProtocol.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,6 @@ private void CreateResponseHeader(bool appCompleted)
var hasConnection = responseHeaders.HasConnection;
var connectionOptions = HttpHeaders.ParseConnection(responseHeaders.HeaderConnection);
var hasTransferEncoding = responseHeaders.HasTransferEncoding;
var transferCoding = HttpHeaders.GetFinalTransferCoding(responseHeaders.HeaderTransferEncoding);

if (_keepAlive && hasConnection && (connectionOptions & ConnectionOptions.KeepAlive) != ConnectionOptions.KeepAlive)
{
Expand All @@ -1122,7 +1121,8 @@ private void CreateResponseHeader(bool appCompleted)
// chunked is applied to a response payload body, the sender MUST either
// apply chunked as the final transfer coding or terminate the message
// by closing the connection.
if (hasTransferEncoding && transferCoding != TransferCoding.Chunked)
if (hasTransferEncoding &&
HttpHeaders.GetFinalTransferCoding(responseHeaders.HeaderTransferEncoding) != TransferCoding.Chunked)
{
_keepAlive = false;
}
Expand Down
8 changes: 0 additions & 8 deletions src/Kestrel.Core/Internal/Http/HttpResponseHeaders.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,6 @@ public partial class HttpResponseHeaders : HttpHeaders
private static readonly byte[] _CrLf = new[] { (byte)'\r', (byte)'\n' };
private static readonly byte[] _colonSpace = new[] { (byte)':', (byte)' ' };

public bool HasConnection => HeaderConnection.Count != 0;

public bool HasTransferEncoding => HeaderTransferEncoding.Count != 0;

public bool HasServer => HeaderServer.Count != 0;

public bool HasDate => HeaderDate.Count != 0;

public Enumerator GetEnumerator()
{
return new Enumerator(this);
Expand Down
5 changes: 1 addition & 4 deletions src/Kestrel.Core/Internal/Http2/Http2Stream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,7 @@ protected override bool TryParseRequest(ReadResult result, out bool endConnectio
}

var hostText = host.ToString();
if (!HttpUtilities.IsValidHostHeader(hostText))
{
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
HttpUtilities.ValidateHostHeader(hostText);

endConnection = false;
return true;
Expand Down
122 changes: 79 additions & 43 deletions src/Kestrel.Core/Internal/Infrastructure/HttpUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -426,45 +426,53 @@ public static string SchemeToString(HttpScheme scheme)
}
}

public static bool IsValidHostHeader(string hostText)
public static void ValidateHostHeader(string hostText)
{
// The spec allows empty values
if (string.IsNullOrEmpty(hostText))
// This is a string.IsNullOrEmpty test, but arranged to elmininate the
// bounds check from accessing the firstChar of the string
if (hostText is null || 0u >= (uint)hostText.Length)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a breaking change if you look at ValidateHostHeader in isolation since now string full of nothing but whitespace could be rejected. I think this is safe though since we trim whitespace from all request header values.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, is stripped by header parsing

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benaadams Does this faster than if (hostText == null || hostText.Length == 0)? If so, why? If not, why write it like this?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still not sure about is vs ==, but I remember something about the bounds check elimination only happening with uint comparisons. It seems to me the JIT should be able to eliminate the bounds check given the more conventional if condition. I'd just wait for the JIT to fix this rather than leave these unconventional conditions around. I mean if we did leave them around, how long we should continue cargo culting the pattern?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is kinda funny looking... thinking about why

Copy link
Copy Markdown
Contributor Author

@benaadams benaadams Apr 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could check the asm, it might; range check elimination misses obvious stuff though

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went on sharplab.io to try some variations on this, and I have to admit that the conventional version is worse than I thought:

public static bool StartsWithBracket(string hostText)
{
    if (hostText == null || 0u >= (uint)hostText.Length)
    {
        return false;
    }

    var firstChar = hostText[0];
    if (firstChar == '[')
    {
        return true;
    }
    
    return false;
}

; Desktop CLR v4.7.2563.00 (clr.dll) on amd64.

Program.StartsWithBracket(System.String)
    L0000: test rcx, rcx
    L0003: jz L000c
    L0005: mov eax, [rcx+0x8]
    L0008: test eax, eax
    L000a: jnz L000f
    L000c: xor eax, eax
    L000e: ret
    L000f: cmp word [rcx+0xc], 0x5b
    L0014: jnz L001c
    L0016: mov eax, 0x1
    L001b: ret
    L001c: xor eax, eax
    L001e: ret
public static bool StartsWithBracket(string hostText)
{
    if (hostText == null || hostText.Length == 0)
    {
        return false;
    }

    var firstChar = hostText[0];
    if (firstChar == '[')
    {
        return true;
    }
    
    return false;
}

; Desktop CLR v4.7.2563.00 (clr.dll) on amd64.

Program.StartsWithBracket(System.String)
    L0000: sub rsp, 0x28
    L0004: test rcx, rcx
    L0007: jz L0010
    L0009: mov edx, [rcx+0x8]
    L000c: test edx, edx
    L000e: jnz L0017
    L0010: xor eax, eax
    L0012: add rsp, 0x28
    L0016: ret
    L0017: cmp edx, 0x0
    L001a: jbe L0034
    L001c: cmp word [rcx+0xc], 0x5b
    L0021: jnz L002d
    L0023: mov eax, 0x1
    L0028: add rsp, 0x28
    L002c: ret
    L002d: xor eax, eax
    L002f: add rsp, 0x28
    L0033: ret
    L0034: call 0x7ffaff4223c0
    L0039: int3
public static bool StartsWithBracket(string hostText)
{
    if (string.IsNullOrEmpty(hostText))
    {
        return false;
    }

    var firstChar = hostText[0];
    if (firstChar == '[')
    {
        return true;
    }
    
    return false;
}


; Desktop CLR v4.7.2563.00 (clr.dll) on amd64.

Program.StartsWithBracket(System.String)
    L0000: sub rsp, 0x28
    L0004: test rcx, rcx
    L0007: jz L0017
    L0009: cmp dword [rcx+0x8], 0x0
    L000d: setz al
    L0010: movzx eax, al
    L0013: test eax, eax
    L0015: jz L001e
    L0017: xor eax, eax
    L0019: add rsp, 0x28
    L001d: ret
    L001e: cmp dword [rcx+0x8], 0x0
    L0022: jbe L003c
    L0024: cmp word [rcx+0xc], 0x5b
    L0029: jnz L0035
    L002b: mov eax, 0x1
    L0030: add rsp, 0x28
    L0034: ret
    L0035: xor eax, eax
    L0037: add rsp, 0x28
    L003b: ret
    L003c: call 0x7ffaff4223c0
    L0041: int3

Now I'm wondering if we should just make this optimization in string.IsNullOrEmpty if it's going to take a while for the JIT to optimize this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I'm wondering if we should just make this optimization in string.IsNullOrEmpty if it's going to take a while for the JIT to optimize this.

coreclr is pretty shut down to any enhancements atm until next week; but at that point any additions will be to 2.2, so a 6 month wait?

Copy link
Copy Markdown
Contributor Author

@benaadams benaadams Apr 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also you need both sides to be uint as a uint to int comparison will first upcast them to long; then do a long >= long comparision :-/

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just in case dotnet/coreclr#17512

{
return true;
// The spec allows empty values
return;
}

if (hostText[0] == '[')
var firstChar = hostText[0];
if (firstChar == '[')
{
return IsValidIPv6Host(hostText);
// Tail call
ValidateIPv6Host(hostText);
}

if (hostText[0] == ':')
else
{
// Only a port
return false;
}
if (firstChar == ':')
{
// Only a port
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}

var i = 0;
for (; i < hostText.Length; i++)
{
if (!IsValidHostChar(hostText[i]))
// Enregister array
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is enregister a technical term for changing a class reference to a local reference or something?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure it gets stored in a register

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compiler term, put variable in register. I hadn't heard it before I strayed too close to the Jit

var hostCharValidity = HostCharValidity;
var i = 0;
for (; i < hostText.Length; i++)
{
break;
var ch = (int)hostText[i];
// Bounds check and elimiate second bounds check
if ((uint)ch >= (uint)hostCharValidity.Length || !hostCharValidity[ch])
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this faster than IsValidHostCharf? Does the second bounds check not get eliminated when ch is a char?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, was generating the asm while changing these and refining them.

Perhaps char isn't treated as an unsigned value (e.g. short as ushort isn't cls complaint) so ch < HostCharValidity.Length could pass with a negative char?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also using a class level array HostCharValidity rather than a local array hostCharValidity can stop bounds checks being eliminated; and they recently made elimination more conservative dotnet/coreclr#15756

{
break;
}
}
}
return IsValidHostPort(hostText, i);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsValidHostChar(char ch)
{
return ch < HostCharValidity.Length && HostCharValidity[ch];
if (i < hostText.Length)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this condition getting repeated in ValidateHostPort? Why check twice?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could drop the check from ValidateHostPort; but then its just going to introduce a bounds check and check it anyway, so might as well check it there for safety?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, is while loop there anyway so needs the check for the loop. Not sure how to get rid of one of them?

{
// Tail call
ValidateHostPort(hostText, i);
}
}
}

// The lead '[' was already checked
[MethodImpl(MethodImplOptions.AggressiveInlining)]
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you removing this attribute from all the methods you expect to be TCO'd?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ish, making sure they are tail called (easy way, by not returning anything, no funky params and last action in function)

But mostly its not inlining where there is no overall fast-path. So origin form is fully inlined, as a the char checks; but what's the right choice for the others? Is ipv6 via raw ip address a common scenerio? Is ports a common scenerio? (might be due to https on 443; but isn't for http on 80)

private static bool IsValidIPv6Host(string hostText)
private static void ValidateIPv6Host(string hostText)
{
for (var i = 1; i < hostText.Length; i++)
{
Expand All @@ -474,58 +482,86 @@ private static bool IsValidIPv6Host(string hostText)
// [::1] is the shortest valid IPv6 host
if (i < 4)
{
return false;
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
else
{
// Tail call
ValidateHostPort(hostText, i + 1);
return;
}
return IsValidHostPort(hostText, i + 1);
}

if (!IsHex(ch) && ch != ':' && ch != '.')
{
return false;
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
}

// Must contain a ']'
return false;
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsValidHostPort(string hostText, int offset)
private static void ValidateHostPort(string hostText, int offset)
{
if (offset == hostText.Length)
// Skip bounds check for accessing the [offset] element
if ((uint)offset >= (uint)hostText.Length)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the bounds check get repeated unless you cat offset to a uint? If so, that seems crazy. It seemed odd when the offset started as a char and it even seems more odd now that the offset is the int.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have to check its not less than zero and also not greater than or equal to length; so two tests. Which is one test with the uint. Also I think if you do it in two tests, it doesn't work - outside of for loops with a constant start.

{
return true;
return;
}

if (hostText[offset] != ':' || hostText.Length == offset + 1)
var firstChar = hostText[offset];
offset++;
if (firstChar != ':' || (uint)offset >= (uint)hostText.Length)
{
// Must have at least one number after the colon if present.
return false;
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}

for (var i = offset + 1; i < hostText.Length; i++)
// This do+if check rather than for loop is to elimitate the bounds check, since
// the Jit doesn't currently pick up on it when starting at a variable offset
do
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think transforming simple for loops to complicated do/while loops is worthwhile when we can just wait for the JIT to handle this better.

Copy link
Copy Markdown
Contributor Author

@benaadams benaadams Mar 17, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be waiting a long time https://github.com/dotnet/coreclr/issues/15723#issuecomment-357152332

it's probably going to take a while, the rangecheck phase is a bit of a mess and it is difficult and risky to extend it to handle new patterns.

The issue here is its a range check per character; and when looking at the valid characters array that's another range check per character

{
if (!IsNumeric(hostText[i]))
// Elminate bounds check for array access
if ((uint)offset >= (uint)hostText.Length)
{
return false;
// Length reached, end of loop
break;
}
}

return true;
var ch = hostText[offset];
offset++;
if (!IsNumeric(ch))
{
BadHttpRequestException.Throw(RequestRejectionReason.InvalidHostHeader, hostText);
}
} while (true);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsNumeric(char ch)
{
return '0' <= ch && ch <= '9';
// '0' <= ch && ch <= '9'
// (uint)(ch - '0') <= (uint)('9' - '0')

// Subtract start of range '0'
// Cast to uint to change negative numbers to large numbers
// Check if less than 10 representing chars '0' - '9'
return (uint)(ch - '0') < 10u;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we know that a subtraction then a comparison is faster than two comparisons? I ask because the earlier version certainly does read better which I guess is why you left it there as a comment. I have more or less the same question about the IsHex change.

I know you've posted some nice results from the ResponseHeadersWritingBenchmark and the Http1ConnectionBenchmark, but I'm more interested in the performance of these methods in isolation.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its an extra branch prediction/mis-prediction per character; rather than the extra comparison that's the issue; cpu can only run with so many branches in parallel before you hit a pipeline bubble: Control_hazards_(branch_hazards)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you know changing '0' <= ch && ch <= '9' to (uint)(ch - '0') < 10u improves the branch prediction?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No branch vs 1 branch?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense... I'd feel much more warm and fuzzy about it if there were some BenchmarkDotNet results to back up this theory.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have returned to my command center of my big screen monitors, so should be able to rummage something up.

}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsHex(char ch)
{
return IsNumeric(ch)
|| ('a' <= ch && ch <= 'f')
|| ('A' <= ch && ch <= 'F');
// || ('a' <= ch && ch <= 'f')
// || ('A' <= ch && ch <= 'F');

// Lowercase indiscriminately (or with 32)
// Subtract start of range 'a'
// Cast to uint to change negative numbers to large numbers
// Check if less than 6 representing chars 'a' - 'f'
|| (uint)((ch | 32) - 'a') < 6u;
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is reducing 4 branches to 1 branch

}
}
}
Loading