diff --git a/src/libraries/System.Private.Uri/src/System/IriHelper.cs b/src/libraries/System.Private.Uri/src/System/IriHelper.cs index 2b7013ac8e430b..c5aaeb958596a6 100644 --- a/src/libraries/System.Private.Uri/src/System/IriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/IriHelper.cs @@ -69,51 +69,46 @@ internal static bool CheckIriUnicodeRange(uint value, bool isQuery) public static bool IsInInclusiveRange(uint value, uint min, uint max) => (value - min) <= (max - min); - // // IRI normalization for strings containing characters that are not allowed or // escaped characters that should be unescaped in the context of the specified Uri component. - // - internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end, bool isQuery) + internal static string EscapeUnescapeIri(ReadOnlySpan span, bool isQuery) { - Debug.Assert(end >= 0 && start >= 0 && start <= end); - - int size = end - start; - var dest = size <= Uri.StackallocThreshold + var dest = span.Length <= Uri.StackallocThreshold ? new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]) - : new ValueStringBuilder(size); + : new ValueStringBuilder(span.Length); Span maxUtf8EncodedSpan = stackalloc byte[4]; - for (int i = start; i < end; ++i) + for (int i = 0; (uint)i < (uint)span.Length; i++) { - char ch = pInput[i]; + char ch = span[i]; + if (ch == '%') { - if (end - i > 2) + if ((uint)(i + 2) < (uint)span.Length) { - ch = UriHelper.DecodeHexChars(pInput[i + 1], pInput[i + 2]); + ch = UriHelper.DecodeHexChars(span[i + 1], span[i + 2]); // Do not unescape a reserved char if (ch == Uri.c_DummyChar || UriHelper.IsNotSafeForUnescape(ch)) { // keep as is - dest.Append(pInput[i++]); - dest.Append(pInput[i++]); - dest.Append(pInput[i]); - continue; + dest.Append(span[i]); + dest.Append(span[i + 1]); + dest.Append(span[i + 2]); + i += 2; } else if (ch <= '\x7F') { // ASCII dest.Append(ch); i += 2; - continue; } else { // possibly utf8 encoded sequence of unicode int charactersRead = PercentEncodingHelper.UnescapePercentEncodedUTF8Sequence( - new ReadOnlySpan(pInput + i, end - i), + span.Slice(i), ref dest, isQuery, iriParsing: true); @@ -121,10 +116,8 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end Debug.Assert(charactersRead > 0); i += charactersRead - 1; // -1 as i will be incremented in the loop } - } - else - { - dest.Append(pInput[i]); + + continue; } } else if (ch > '\x7f') @@ -136,9 +129,9 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end char ch2 = '\0'; - if ((char.IsHighSurrogate(ch)) && (i + 1 < end)) + if ((char.IsHighSurrogate(ch)) && (uint)(i + 1) < (uint)span.Length) { - ch2 = pInput[i + 1]; + ch2 = span[i + 1]; isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, isQuery); } else @@ -179,12 +172,12 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end { i++; } + + continue; } - else - { - // ASCII, just copy the character - dest.Append(pInput[i]); - } + + // ASCII, just copy the character + dest.Append(ch); } return dest.ToString(); diff --git a/src/libraries/System.Private.Uri/src/System/UncNameHelper.cs b/src/libraries/System.Private.Uri/src/System/UncNameHelper.cs index 54cec4103e4077..d541ca4896b47b 100644 --- a/src/libraries/System.Private.Uri/src/System/UncNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UncNameHelper.cs @@ -36,33 +36,27 @@ public static string ParseCanonicalName(string str, int start, int end, ref bool // // Assumption is the caller will check on the resulting name length // Remarks: MUST NOT be used unless all input indexes are verified and trusted. - public static unsafe bool IsValid(char* name, int start, ref int returnedEnd, bool notImplicitFile) + public static bool IsValid(ReadOnlySpan name, bool notImplicitFile, out int nameLength) { - int end = returnedEnd; + nameLength = 0; - if (start == end) - return false; - // // First segment could consist of only '_' or '-' but it cannot be all digits or empty - // bool validShortName = false; - int i = start; - for (; i < end; ++i) + int i = 0; + for (; i < name.Length; i++) { - if (name[i] == '/' || name[i] == '\\' || (notImplicitFile && (name[i] == ':' || name[i] == '?' || name[i] == '#'))) + if (char.IsLetter(name[i]) || name[i] == '-' || name[i] == '_') { - end = i; - break; + validShortName = true; } - else if (name[i] == '.') + else if (name[i] == '/' || name[i] == '\\' || (notImplicitFile && (name[i] == ':' || name[i] == '?' || name[i] == '#'))) { - ++i; break; } - - if (char.IsLetter(name[i]) || name[i] == '-' || name[i] == '_') + else if (name[i] == '.') { - validShortName = true; + i++; + break; } else if (!char.IsAsciiDigit(name[i])) { @@ -73,20 +67,17 @@ public static unsafe bool IsValid(char* name, int start, ref int returnedEnd, bo if (!validShortName) return false; - // // Subsequent segments must start with a letter or a digit - // - for (; i < end; ++i) + for (; (uint)i < (uint)name.Length; i++) { if (name[i] == '/' || name[i] == '\\' || (notImplicitFile && (name[i] == ':' || name[i] == '?' || name[i] == '#'))) { - end = i; break; } else if (name[i] == '.') { - if (!validShortName || ((i - 1) >= start && name[i - 1] == '.')) + if (!validShortName || name[i - 1] == '.') return false; validShortName = false; @@ -98,23 +89,25 @@ public static unsafe bool IsValid(char* name, int start, ref int returnedEnd, bo } else if (char.IsLetter(name[i]) || char.IsAsciiDigit(name[i])) { - if (!validShortName) - validShortName = true; + validShortName = true; } else + { return false; + } } - // last segment can end with the dot - if (((i - 1) >= start && name[i - 1] == '.')) - validShortName = true; - if (!validShortName) - return false; - - // caller must check for (end - start <= MaximumInternetNameLength) + { + // last segment can end with the dot + if ((uint)(i - 1) >= (uint)name.Length || name[i - 1] != '.') + { + return false; + } + } - returnedEnd = end; + // Caller must check that (nameLength <= MaximumInternetNameLength) + nameLength = i; return true; } } diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index 86d2dd08a7daa3..f9e3859410a89f 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -1993,8 +1993,6 @@ private static ParsingError ParseScheme(string uriString, ref Flags flags, ref U return GetException(result); } - // - // // This method tries to parse the minimal information needed to certify the validity // of a uri string // @@ -2003,163 +2001,139 @@ private static ParsingError ParseScheme(string uriString, ref Flags flags, ref U // The method must be called only at the .ctor time // // Returns ParsingError.None if the Uri syntax is valid, an error otherwise - // - private unsafe ParsingError PrivateParseMinimal() + private ParsingError PrivateParseMinimal() { Debug.Assert(_syntax != null); DebugAssertInCtor(); + Debug.Assert(ReferenceEquals(_string, OriginalString)); int idx = (int)(_flags & Flags.IndexMask); - int length = _string.Length; // Means a custom UriParser did call "base" InitializeAndValidate() _flags &= ~(Flags.IndexMask | Flags.UserDrivenParsing); - //STEP2: Parse up to the port + ReadOnlySpan str = _string; - Debug.Assert(ReferenceEquals(_string, OriginalString)); + // Trim any trailing spaces + while (!str.IsEmpty && UriHelper.IsLWS(str[str.Length - 1])) + { + str = str.Slice(0, str.Length - 1); + } - fixed (char* pUriString = _string) + // Unix Path + if (!OperatingSystem.IsWindows() && InFact(Flags.UnixPath)) { - // Cut trailing spaces in _string - if (length > idx && UriHelper.IsLWS(pUriString[length - 1])) - { - --length; - while (length != idx && UriHelper.IsLWS(pUriString[--length])) - ; - ++length; - } + _flags |= Flags.BasicHostType; + _flags |= (Flags)idx; + return ParsingError.None; + } + + // Old Uri parser tries to figure out on a DosPath in all cases. + // Hence http://c:/ is treated as DosPath without the host while it should be a host "c", port 80 + // + // This block is compatible with Old Uri parser in terms it will look for the DosPath if the scheme + // syntax allows both empty hostnames and DosPath + if (_syntax.IsAllSet(UriSyntaxFlags.AllowEmptyHost | UriSyntaxFlags.AllowDOSPath) && + NotAny(Flags.ImplicitFile) && + (idx + 1 < str.Length)) + { + int i = idx; - // Unix Path - if (!OperatingSystem.IsWindows() && InFact(Flags.UnixPath)) + // V1 Compat: Allow _compression_ of > 3 slashes only for File scheme. + // This will skip all slashes and if their number is 2+ it sets the AuthorityFound flag + for (; (uint)i < (uint)str.Length; i++) { - _flags |= Flags.BasicHostType; - _flags |= (Flags)idx; - return ParsingError.None; + if (str[i] is not ('\\' or '/')) + { + break; + } } - // Old Uri parser tries to figure out on a DosPath in all cases. - // Hence http://c:/ is treated as DosPath without the host while it should be a host "c", port 80 - // - // This block is compatible with Old Uri parser in terms it will look for the DosPath if the scheme - // syntax allows both empty hostnames and DosPath - // - if (_syntax.IsAllSet(UriSyntaxFlags.AllowEmptyHost | UriSyntaxFlags.AllowDOSPath) - && NotAny(Flags.ImplicitFile) && (idx + 1 < length)) + if (_syntax.InFact(UriSyntaxFlags.FileLikeUri) || i - idx <= 3) { - char c; - int i = idx; - - // V1 Compat: Allow _compression_ of > 3 slashes only for File scheme. - // This will skip all slashes and if their number is 2+ it sets the AuthorityFound flag - for (; i < length; ++i) + // if more than one slash after the scheme, the authority is present + if (i - idx >= 2) { - if (!((c = pUriString[i]) == '\\' || c == '/')) - break; + _flags |= Flags.AuthorityFound; } - if (_syntax.InFact(UriSyntaxFlags.FileLikeUri) || i - idx <= 3) + // DOS-like path? + if ((uint)(i + 1) < (uint)str.Length && str[i + 1] is ':' or '|' && char.IsAsciiLetter(str[i])) { - // if more than one slash after the scheme, the authority is present - if (i - idx >= 2) + if ((uint)(i + 2) >= (uint)str.Length || str[i + 2] is not ('\\' or '/')) { - _flags |= Flags.AuthorityFound; + // report an error but only for a file: scheme + if (_syntax.InFact(UriSyntaxFlags.FileLikeUri)) + return ParsingError.MustRootedPath; } - // DOS-like path? - if (i + 1 < length && ((c = pUriString[i + 1]) == ':' || c == '|') && - char.IsAsciiLetter(pUriString[i])) + else { - if (i + 2 >= length || ((c = pUriString[i + 2]) != '\\' && c != '/')) + // This will set IsDosPath + _flags |= Flags.DosPath; + + if (_syntax.InFact(UriSyntaxFlags.MustHaveAuthority)) + { + // when DosPath found and Authority is required, set this flag even if Authority is empty + _flags |= Flags.AuthorityFound; + } + if (i != idx && i - idx != 2) { - // report an error but only for a file: scheme - if (_syntax.InFact(UriSyntaxFlags.FileLikeUri)) - return ParsingError.MustRootedPath; + //This will remember that DosPath is rooted + idx = i - 1; } else { - // This will set IsDosPath - _flags |= Flags.DosPath; - - if (_syntax.InFact(UriSyntaxFlags.MustHaveAuthority)) - { - // when DosPath found and Authority is required, set this flag even if Authority is empty - _flags |= Flags.AuthorityFound; - } - if (i != idx && i - idx != 2) - { - //This will remember that DosPath is rooted - idx = i - 1; - } - else - { - idx = i; - } + idx = i; } } - // UNC share? - else if (_syntax.InFact(UriSyntaxFlags.FileLikeUri) && (i - idx >= 2 && i - idx != 3 && - i < length && pUriString[i] != '?' && pUriString[i] != '#')) - { - // V1.0 did not support file:///, fixing it with minimal behavior change impact - // Only FILE scheme may have UNC Path flag set - _flags |= Flags.UncPath; - idx = i; - } - else if (!OperatingSystem.IsWindows() && _syntax.InFact(UriSyntaxFlags.FileLikeUri) && pUriString[i - 1] == '/' && i - idx == 3) - { - _syntax = UriParser.UnixFileUri; - _flags |= Flags.UnixPath | Flags.AuthorityFound; - idx += 2; - } } - } - // - //STEP 1.5 decide on the Authority component - // - if ((_flags & (Flags.UncPath | Flags.DosPath | Flags.UnixPath)) != 0) - { - } - else if ((idx + 2) <= length) - { - char first = pUriString[idx]; - char second = pUriString[idx + 1]; - - if (_syntax.InFact(UriSyntaxFlags.MustHaveAuthority)) + // UNC share? + else if (_syntax.InFact(UriSyntaxFlags.FileLikeUri) && + i - idx >= 2 && i - idx != 3 && + (uint)i < (uint)str.Length && str[i] is not ('?' or '#')) { - // (V1.0 compatibility) This will allow http:\\ http:\/ http:/\ - if ((first == '/' || first == '\\') && (second == '/' || second == '\\')) - { - _flags |= Flags.AuthorityFound; - idx += 2; - } - else - { - return ParsingError.BadAuthority; - } + // V1.0 did not support file:///, fixing it with minimal behavior change impact + // Only FILE scheme may have UNC Path flag set + _flags |= Flags.UncPath; + idx = i; } - else if (_syntax.InFact(UriSyntaxFlags.OptionalAuthority) && (InFact(Flags.AuthorityFound) || - (first == '/' && second == '/'))) + else if (!OperatingSystem.IsWindows() && _syntax.InFact(UriSyntaxFlags.FileLikeUri) && str[i - 1] == '/' && i - idx == 3) + { + _syntax = UriParser.UnixFileUri; + _flags |= Flags.UnixPath | Flags.AuthorityFound; + idx += 2; + } + } + } + + // Decide on the Authority component + if ((_flags & (Flags.UncPath | Flags.DosPath | Flags.UnixPath)) != 0) + { + + } + else if ((uint)(idx + 1) < (uint)str.Length) + { + char first = str[idx]; + char second = str[idx + 1]; + + if (_syntax.InFact(UriSyntaxFlags.MustHaveAuthority)) + { + // (V1.0 compatibility) This will allow http:\\ http:\/ http:/\ + if ((first == '/' || first == '\\') && (second == '/' || second == '\\')) { _flags |= Flags.AuthorityFound; idx += 2; } - // There is no Authority component, save the Path index - // Ideally we would treat mailto like any other URI, but for historical reasons we have to separate out its host parsing. - else if (_syntax.NotAny(UriSyntaxFlags.MailToLikeUri)) + else { - // By now we know the URI has no Authority, so if the URI must be normalized, initialize it without one. - if (InFact(Flags.HasUnicode)) - { - _string = _string.Substring(0, idx); - } - // Since there is no Authority, the path index is just the end of the scheme. - _flags |= ((Flags)idx | Flags.UnknownHostType); - return ParsingError.None; + return ParsingError.BadAuthority; } } - else if (_syntax.InFact(UriSyntaxFlags.MustHaveAuthority)) + else if (_syntax.InFact(UriSyntaxFlags.OptionalAuthority) && (InFact(Flags.AuthorityFound) || + (first == '/' && second == '/'))) { - return ParsingError.BadAuthority; + _flags |= Flags.AuthorityFound; + idx += 2; } // There is no Authority component, save the Path index // Ideally we would treat mailto like any other URI, but for historical reasons we have to separate out its host parsing. @@ -2174,62 +2148,78 @@ private unsafe ParsingError PrivateParseMinimal() _flags |= ((Flags)idx | Flags.UnknownHostType); return ParsingError.None; } - - // vsmacros://c:\path\file - // Note that two slashes say there must be an Authority but instead the path goes - // Fro V1 compat the next block allow this case but not for schemes like http - if (InFact(Flags.DosPath)) + } + else if (_syntax.InFact(UriSyntaxFlags.MustHaveAuthority)) + { + return ParsingError.BadAuthority; + } + // There is no Authority component, save the Path index + // Ideally we would treat mailto like any other URI, but for historical reasons we have to separate out its host parsing. + else if (_syntax.NotAny(UriSyntaxFlags.MailToLikeUri)) + { + // By now we know the URI has no Authority, so if the URI must be normalized, initialize it without one. + if (InFact(Flags.HasUnicode)) { - _flags |= (((_flags & Flags.AuthorityFound) != 0) ? Flags.BasicHostType : Flags.UnknownHostType); - _flags |= (Flags)idx; - return ParsingError.None; + _string = _string.Substring(0, idx); } + // Since there is no Authority, the path index is just the end of the scheme. + _flags |= ((Flags)idx | Flags.UnknownHostType); + return ParsingError.None; + } - //STEP 2: Check the syntax of authority expecting at least one character in it - // - // Note here we do know that there is an authority in the string OR it's a DOS path + // vsmacros://c:\path\file + // Note that two slashes say there must be an Authority but instead the path goes + // Fro V1 compat the next block allow this case but not for schemes like http + if (InFact(Flags.DosPath)) + { + _flags |= (((_flags & Flags.AuthorityFound) != 0) ? Flags.BasicHostType : Flags.UnknownHostType); + _flags |= (Flags)idx; + return ParsingError.None; + } - // We may find a userInfo and the port when parsing an authority - // Also we may find a registry based authority. - // We must ensure that known schemes do use a server-based authority - { - ParsingError err = ParsingError.None; - string? newHost = null; // stores newly parsed host when original strings are being switched + //STEP 2: Check the syntax of authority expecting at least one character in it + // + // Note here we do know that there is an authority in the string OR it's a DOS path - idx = CheckAuthorityHelper(pUriString, idx, length, ref err, ref _flags, _syntax, ref newHost); - if (err != ParsingError.None) - return err; + // We may find a userInfo and the port when parsing an authority + // Also we may find a registry based authority. + // We must ensure that known schemes do use a server-based authority + { + string? newHost = null; // stores newly parsed host when original strings are being switched - if (idx < length) - { - char hostTerminator = pUriString[idx]; + idx = CheckAuthorityHelper(str, idx, out ParsingError err, ref _flags, _syntax.Flags, ref newHost); + if (err != ParsingError.None) + return err; - // This will disallow '\' as the host terminator for any scheme that is not implicitFile or cannot have a Dos Path - if (hostTerminator == '\\' && NotAny(Flags.ImplicitFile) && _syntax.NotAny(UriSyntaxFlags.AllowDOSPath)) - { - return ParsingError.BadAuthorityTerminator; - } - // When the hostTerminator is '/' on Unix, use the UnixFile syntax (preserve backslashes) - else if (!OperatingSystem.IsWindows() && hostTerminator == '/' && NotAny(Flags.ImplicitFile) && InFact(Flags.UncPath) && _syntax == UriParser.FileUri) - { - _syntax = UriParser.UnixFileUri; - } - } + if ((uint)idx < (uint)str.Length) + { + char hostTerminator = str[idx]; - if (newHost is not null) + // This will disallow '\' as the host terminator for any scheme that is not implicitFile or cannot have a Dos Path + if (hostTerminator == '\\' && NotAny(Flags.ImplicitFile) && _syntax.NotAny(UriSyntaxFlags.AllowDOSPath)) + { + return ParsingError.BadAuthorityTerminator; + } + // When the hostTerminator is '/' on Unix, use the UnixFile syntax (preserve backslashes) + else if (!OperatingSystem.IsWindows() && hostTerminator == '/' && NotAny(Flags.ImplicitFile) && InFact(Flags.UncPath) && _syntax == UriParser.FileUri) { - _string = newHost; + _syntax = UriParser.UnixFileUri; } } - // The Path (or Port) parsing index is reloaded on demand in CreateUriInfo when accessing a Uri property - _flags |= (Flags)idx; - - // The rest of the string will be parsed on demand - // The Host/Authority is all checked, the type is known but the host value string - // is not created/canonicalized at this point. + if (newHost is not null) + { + _string = newHost; + } } + // The Path (or Port) parsing index is reloaded on demand in CreateUriInfo when accessing a Uri property + _flags |= (Flags)idx; + + // The rest of the string will be parsed on demand + // The Host/Authority is all checked, the type is known but the host value string + // is not created/canonicalized at this point. + return ParsingError.None; } @@ -2571,7 +2561,7 @@ private static string CreateHostStringHelper(string str, int idx, int end, ref F // // Called under lock() // - private unsafe void GetHostViaCustomSyntax() + private void GetHostViaCustomSyntax() { // A multithreading check if (_info.Host != null) @@ -2582,22 +2572,11 @@ private unsafe void GetHostViaCustomSyntax() // ATTN: Check on whether recursion has not happened if (_info.Host is null) { - ParsingError err = ParsingError.None; Flags flags = _flags & ~Flags.HostTypeMask; + string? newHost = null; - fixed (char* pHost = host) - { - string? newHost = null; - if (CheckAuthorityHelper(pHost, 0, host.Length, ref err, ref flags, _syntax, ref newHost) != - host.Length) - { - // We cannot parse the entire host string - flags &= ~Flags.HostTypeMask; - flags |= Flags.UnknownHostType; - } - } - - if (err != ParsingError.None || (flags & Flags.HostTypeMask) == Flags.UnknownHostType) + if (CheckAuthorityHelper(host, 0, out ParsingError err, ref flags, _syntax.Flags, ref newHost) != host.Length || + err != ParsingError.None) { // Well, custom parser has returned a not known host type, take it as Basic then. _flags = (_flags & ~Flags.HostTypeMask) | Flags.BasicHostType; @@ -3738,38 +3717,34 @@ private static int ParseSchemeCheckImplicitFile(string uriString, ref ParsingErr return UriParser.FindOrFetchAsUnknownV1Syntax(UriHelper.SpanToLowerInvariantString(scheme)); } - // // Checks the syntax of an authority component. It may also get a userInfo if present // Returns an error if no/mailformed authority found // Does not NOT touch _info // Returns position of the Path component // // Must be called in the ctor only - private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, - ref ParsingError err, ref Flags flags, UriParser syntax, ref string? newHost) + private int CheckAuthorityHelper(ReadOnlySpan str, int startOffset, out ParsingError err, ref Flags flags, UriSyntaxFlags syntaxFlags, ref string? newHost) { Debug.Assert((_flags & Flags.Debug_LeftConstructor) == 0 || (!_syntax.IsSimple && Monitor.IsEntered(_info))); + Debug.Assert((_flags & Flags.HasUserInfo) == 0 && (_flags & Flags.HostTypeMask) == 0); + Debug.Assert((uint)startOffset <= (uint)str.Length); - int end = length; - char ch; - int startInput = idx; - int start = idx; + err = ParsingError.None; newHost = null; bool hasUnicode = ((flags & Flags.HasUnicode) != 0); - UriSyntaxFlags syntaxFlags = syntax.Flags; - - Debug.Assert((_flags & Flags.HasUserInfo) == 0 && (_flags & Flags.HostTypeMask) == 0); // need to build new Iri'zed string if (hasUnicode) { - newHost = _originalUnicodeString.Substring(0, startInput); + newHost = _originalUnicodeString.Substring(0, startOffset); } - //Special case is an empty authority - if (idx == length || ((ch = pString[idx]) == '/' || (ch == '\\' && StaticIsFile(syntax)) || ch == '#' || ch == '?')) + char ch; + + // Special case is an empty authority + if ((uint)startOffset >= (uint)str.Length || ((ch = str[startOffset]) == '/' || (ch == '\\' && (syntaxFlags & UriSyntaxFlags.FileLikeUri) != 0) || ch == '#' || ch == '?')) { - if (syntax.InFact(UriSyntaxFlags.AllowEmptyHost)) + if ((syntaxFlags & UriSyntaxFlags.AllowEmptyHost) != 0) { flags &= ~Flags.UncPath; //UNC cannot have an empty hostname if (StaticInFact(flags, Flags.ImplicitFile)) @@ -3780,137 +3755,158 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, else err = ParsingError.BadHostName; - return idx; + return startOffset; } // Attempt to parse user info first + int i = startOffset; if ((syntaxFlags & UriSyntaxFlags.MayHaveUserInfo) != 0) { - for (; start < end; ++start) + for (; (uint)i < (uint)str.Length; i++) { - if (start == end - 1 || pString[start] == '?' || pString[start] == '#' || pString[start] == '\\' || - pString[start] == '/') + ch = str[i]; + + if ((uint)(i + 1) >= (uint)str.Length || ch == '?' || ch == '#' || ch == '\\' || ch == '/') { - start = idx; + ch = str[startOffset]; + i = startOffset; break; } - else if (pString[start] == '@') + + if (ch == '@') { flags |= Flags.HasUserInfo; // Iri'ze userinfo if (hasUnicode) { - // Normalize user info - newHost += IriHelper.EscapeUnescapeIri(pString, startInput, start + 1, isQuery: false); + newHost += IriHelper.EscapeUnescapeIri(str.Slice(startOffset, i - startOffset + 1), isQuery: false); } - ++start; - ch = pString[start]; + + ch = str[i + 1]; + i++; break; } } } - if (ch == '[' && syntax.InFact(UriSyntaxFlags.AllowIPv6Host) && - IPv6AddressHelper.IsValid(new ReadOnlySpan(pString + start, end - start), out int seqEnd)) + int hostLength = 0; + + if (ch == '[' && (syntaxFlags & UriSyntaxFlags.AllowIPv6Host) != 0 && IPv6AddressHelper.IsValid(str.Slice(i), out hostLength)) { - end = start + seqEnd; - if (end < length && pString[end] is not ('/' or '\\') && (IsImplicitFile || pString[end] is not (':' or '?' or '#'))) + int delimiterIdx = i + hostLength; + + if ((uint)delimiterIdx < (uint)str.Length && + str[delimiterIdx] is not ('/' or '\\') && + (IsImplicitFile || str[delimiterIdx] is not (':' or '?' or '#'))) { // A valid IPv6 address wasn't followed by a valid delimiter (e.g. http://[::]extra). // For implicit files we also disallow ? or #. - flags |= Flags.UnknownHostType; err = ParsingError.BadHostName; - return idx; + return i; } flags |= Flags.IPv6HostType; if (hasUnicode) { - newHost = string.Concat(newHost, new ReadOnlySpan(pString + start, end - start)); + newHost = string.Concat(newHost, str.Slice(i, hostLength)); } } - else if (char.IsAsciiDigit(ch) && syntax.InFact(UriSyntaxFlags.AllowIPv4Host) && - IPv4AddressHelper.IsValid(new ReadOnlySpan(pString + start, end - start), out int endSeq, false, StaticNotAny(flags, Flags.ImplicitFile), syntax.InFact(UriSyntaxFlags.V1_UnknownUri))) + else if (char.IsAsciiDigit(ch) && (syntaxFlags & UriSyntaxFlags.AllowIPv4Host) != 0 && + IPv4AddressHelper.IsValid(str.Slice(i), out hostLength, allowIPv6: false, StaticNotAny(flags, Flags.ImplicitFile), (syntaxFlags & UriSyntaxFlags.V1_UnknownUri) != 0)) { - end = start + endSeq; flags |= Flags.IPv4HostType; if (hasUnicode) { - newHost = string.Concat(newHost, new ReadOnlySpan(pString + start, end - start)); + newHost = string.Concat(newHost, str.Slice(i, hostLength)); } } - else if (((syntaxFlags & UriSyntaxFlags.AllowDnsHost) != 0) && !IriParsingStatic(syntax) && - DomainNameHelper.IsValid(new ReadOnlySpan(pString + start, end - start), iri: false, StaticNotAny(flags, Flags.ImplicitFile), out int domainNameLength)) + else if ((syntaxFlags & (UriSyntaxFlags.AllowDnsHost | UriSyntaxFlags.AllowIriParsing)) == UriSyntaxFlags.AllowDnsHost && + DomainNameHelper.IsValid(str.Slice(i), iri: false, StaticNotAny(flags, Flags.ImplicitFile), out hostLength)) { Debug.Assert(!hasUnicode); - end = start + domainNameLength; - // comes here if there are only ascii chars in host with original parsing and no Iri flags |= Flags.DnsHostType; // Canonical DNS hostnames don't contain uppercase letters - if (!new ReadOnlySpan(pString + start, domainNameLength).ContainsAnyInRange('A', 'Z')) + if (!str.Slice(i, hostLength).ContainsAnyInRange('A', 'Z')) { flags |= Flags.CanonicalDnsHost; } } else if (((syntaxFlags & UriSyntaxFlags.AllowDnsHost) != 0) && - (hasUnicode || syntax.InFact(UriSyntaxFlags.AllowIdn)) && - DomainNameHelper.IsValid(new ReadOnlySpan(pString + start, end - start), iri: true, StaticNotAny(flags, Flags.ImplicitFile), out domainNameLength)) + (hasUnicode || (syntaxFlags & UriSyntaxFlags.AllowIdn) != 0) && + DomainNameHelper.IsValid(str.Slice(i), iri: true, StaticNotAny(flags, Flags.ImplicitFile), out hostLength)) { - end = start + domainNameLength; + flags |= Flags.DnsHostType; + + if (hasUnicode) + { + ReadOnlySpan host = str.Slice(i, hostLength); - CheckAuthorityHelperHandleDnsIri(pString, start, end, hasUnicode, - ref flags, ref newHost, ref err); + if (UriHelper.StripBidiControlCharacters(host, out string? stripped)) + { + host = stripped; + } + + try + { + newHost = UriHelper.NormalizeAndConcat(newHost, host); + } + catch (ArgumentException) + { + err = ParsingError.BadHostName; + } + } } else if ((syntaxFlags & UriSyntaxFlags.AllowUncHost) != 0) { - // // This must remain as the last check before BasicHost type - // - if (UncNameHelper.IsValid(pString, start, ref end, StaticNotAny(flags, Flags.ImplicitFile))) + if (UncNameHelper.IsValid(str.Slice(i), StaticNotAny(flags, Flags.ImplicitFile), out hostLength)) { - if (end - start <= UncNameHelper.MaximumInternetNameLength) + if (hostLength <= UncNameHelper.MaximumInternetNameLength) { flags |= Flags.UncHostType; if (hasUnicode) { - newHost = string.Concat(newHost, new ReadOnlySpan(pString + start, end - start)); + newHost = string.Concat(newHost, str.Slice(i, hostLength)); } } } } + int endOfHost = i + hostLength; + char hostDelimiter = (uint)endOfHost < (uint)str.Length ? str[endOfHost] : '\0'; + // The deal here is that we won't allow '\' host terminator except for the File scheme // If we see '\' we try to make it a part of a Basic host - if (end < length && pString[end] == '\\' && (flags & Flags.HostTypeMask) != Flags.HostNotParsed - && !StaticIsFile(syntax)) + if (hostDelimiter == '\\' && (flags & Flags.HostTypeMask) != Flags.HostNotParsed && (syntaxFlags & UriSyntaxFlags.FileLikeUri) == 0) { - if (syntax.InFact(UriSyntaxFlags.V1_UnknownUri)) + if ((syntaxFlags & UriSyntaxFlags.V1_UnknownUri) != 0) { err = ParsingError.BadHostName; - flags |= Flags.UnknownHostType; - return end; + return endOfHost; } + flags &= ~Flags.HostTypeMask; } // Here we have checked the syntax up to the end of host // The only thing that can cause an exception is the port value // Spend some (duplicated) cycles on that. - else if (end < length && pString[end] == ':') + else if (hostDelimiter == ':') { - if (syntax.InFact(UriSyntaxFlags.MayHavePort)) + if ((syntaxFlags & UriSyntaxFlags.MayHavePort) != 0) { int port = 0; - int startPort = end; - for (idx = end + 1; idx < length; ++idx) + int startPort = endOfHost; + + for (i = endOfHost + 1; (uint)i < (uint)str.Length; i++) { - int val = pString[idx] - '0'; + int val = str[i] - '0'; if ((uint)val <= ('9' - '0')) { if ((port = (port * 10 + val)) > 0xFFFF) @@ -3923,8 +3919,7 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, else { // The second check is to keep compatibility with V1 until the UriParser is registered - if (syntax.InFact(UriSyntaxFlags.AllowAnyOtherHost) - && syntax.NotAny(UriSyntaxFlags.V1_UnknownUri)) + if ((syntaxFlags & (UriSyntaxFlags.AllowAnyOtherHost | UriSyntaxFlags.V1_UnknownUri)) == UriSyntaxFlags.AllowAnyOtherHost) { flags &= ~Flags.HostTypeMask; break; @@ -3932,27 +3927,28 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, else { err = ParsingError.BadPort; - return idx; + return i; } } } + // check on 0-ffff range if (port > 0xFFFF) { - if (syntax.InFact(UriSyntaxFlags.AllowAnyOtherHost)) + if ((syntaxFlags & UriSyntaxFlags.AllowAnyOtherHost) != 0) { flags &= ~Flags.HostTypeMask; } else { err = ParsingError.BadPort; - return idx; + return i; } } if (hasUnicode) { - newHost = string.Concat(newHost, new ReadOnlySpan(pString + startPort, idx - startPort)); + newHost = string.Concat(newHost, str.Slice(startPort, i - startPort)); } } else @@ -3966,14 +3962,14 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, { //No user info for a Basic hostname flags &= ~Flags.HasUserInfo; + // Some schemes do not allow HostType = Basic (plus V1 almost never understands this issue) - // - if (syntax.InFact(UriSyntaxFlags.AllowAnyOtherHost)) + if ((syntaxFlags & UriSyntaxFlags.AllowAnyOtherHost) != 0) { flags |= Flags.BasicHostType; - for (end = idx; end < length; ++end) + for (endOfHost = i; (uint)endOfHost < (uint)str.Length; endOfHost++) { - if (pString[end] == '/' || (pString[end] == '?' || pString[end] == '#')) + if (str[endOfHost] == '/' || str[endOfHost] == '?' || str[endOfHost] == '#') { break; } @@ -3984,7 +3980,7 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, // Normalize any other host or do idn try { - newHost = UriHelper.NormalizeAndConcat(newHost, new ReadOnlySpan(pString + startInput, end - startInput)); + newHost = UriHelper.NormalizeAndConcat(newHost, str.Slice(i, endOfHost - i)); } catch (ArgumentException) { @@ -3994,20 +3990,18 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, } else { - // // ATTN V1 compat: V1 supports hostnames like ".." and ".", and so we do but only for unknown schemes. - // - if (syntax.InFact(UriSyntaxFlags.V1_UnknownUri)) + if ((syntaxFlags & UriSyntaxFlags.V1_UnknownUri) != 0) { // Can assert here that the host is not empty so we will set dotFound // at least once or fail before exiting the loop bool dotFound = false; - int startOtherHost = idx; - for (end = idx; end < length; ++end) + for (endOfHost = i; (uint)endOfHost < (uint)str.Length; endOfHost++) { - if (dotFound && (pString[end] == '/' || pString[end] == '?' || pString[end] == '#')) + if (dotFound && (str[endOfHost] == '/' || str[endOfHost] == '?' || str[endOfHost] == '#')) break; - else if (end < (idx + 2) && pString[end] == '.') + + if (endOfHost < (i + 2) && str[endOfHost] == '.') { // allow one or two dots dotFound = true; @@ -4016,10 +4010,10 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, { //failure err = ParsingError.BadHostName; - flags |= Flags.UnknownHostType; - return idx; + return i; } } + //success flags |= Flags.BasicHostType; @@ -4028,53 +4022,24 @@ private unsafe int CheckAuthorityHelper(char* pString, int idx, int length, // Normalize any other host try { - newHost = UriHelper.NormalizeAndConcat(newHost, new ReadOnlySpan(pString + startOtherHost, end - startOtherHost)); + newHost = UriHelper.NormalizeAndConcat(newHost, str.Slice(i, endOfHost - i)); } catch (ArgumentException) { err = ParsingError.BadFormat; - return idx; + return i; } } } - else if (syntax.InFact(UriSyntaxFlags.MustHaveAuthority) || - (syntax.InFact(UriSyntaxFlags.MailToLikeUri))) + else if ((syntaxFlags & (UriSyntaxFlags.MustHaveAuthority | UriSyntaxFlags.MailToLikeUri)) != 0) { err = ParsingError.BadHostName; - flags |= Flags.UnknownHostType; - return idx; + return i; } } } - return end; - } - - private static unsafe void CheckAuthorityHelperHandleDnsIri(char* pString, int start, int end, - bool hasUnicode, ref Flags flags, - ref string? newHost, ref ParsingError err) - { - // comes here only if host has unicode chars and iri is on or idn is allowed - - flags |= Flags.DnsHostType; - - if (hasUnicode) - { - ReadOnlySpan host = new ReadOnlySpan(pString + start, end - start); - if (UriHelper.StripBidiControlCharacters(host, out string? stripped)) - { - host = stripped; - } - - try - { - newHost = UriHelper.NormalizeAndConcat(newHost, host); - } - catch (ArgumentException) - { - err = ParsingError.BadHostName; - } - } + return endOfHost; } // diff --git a/src/libraries/System.Private.Uri/src/System/UriExt.cs b/src/libraries/System.Private.Uri/src/System/UriExt.cs index 5196d3ce4bfbf7..e5fc3f1532dc57 100644 --- a/src/libraries/System.Private.Uri/src/System/UriExt.cs +++ b/src/libraries/System.Private.Uri/src/System/UriExt.cs @@ -685,12 +685,9 @@ public static bool TryEscapeDataString(ReadOnlySpan charsToEscape, Span front = Buffer.AsSpan(0, PaddingLength); - for (int i = 0; i < front.Length; i++) - { - if (front[i] != PaddingValue) - { - Assert.Fail("Heap corruption detected: unexpected padding value at index: " + i); - } - } - - ReadOnlySpan back = Buffer.AsSpan(PaddingLength + _length); - for (int i = 0; i < back.Length; i++) - { - if (back[i] != PaddingValue) - { - Assert.Fail("Heap corruption detected: unexpected padding value at index: " + (PaddingLength + _length + i)); - } - } + Assert.Equal(result_en_query, IriHelper.EscapeUnescapeIri(uriInput, true)); + Assert.Equal(result_en_nonQuery, IriHelper.EscapeUnescapeIri(uriInput, false)); } } }