From 34780b9dca4380633234d3f739a0cc210f4cd586 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:55:15 +0000 Subject: [PATCH 1/5] Initial plan From b47d85707c2ce24e81d6577e06fa53ef6d75ffc1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 27 Oct 2025 16:47:56 +0000 Subject: [PATCH 2/5] Fix Uri IndexOutOfRangeException with bidi control characters Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../System.Private.Uri/src/System/Uri.cs | 53 ++++++++++++++++--- .../tests/FunctionalTests/UriTests.cs | 16 ++++++ 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index 6e3777358beb99..dc3cd1fb01e680 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -2273,7 +2273,7 @@ private unsafe void CreateUriInfo(Flags cF) if ((cF & Flags.ImplicitFile) != 0) { idx = 0; - while (UriHelper.IsLWS(_string[idx])) + while (idx < _string.Length && UriHelper.IsLWS(_string[idx])) { ++idx; ++info.Offset.Scheme; @@ -2283,8 +2283,18 @@ private unsafe void CreateUriInfo(Flags cF) { // For implicit file AND Unc only idx += 2; + // Ensure idx doesn't exceed string length after increment + if (idx > _string.Length) + { + throw GetException(ParsingError.BadHostName)!; + } //skip any other slashes (compatibility with V1.0 parser) int end = (int)(cF & Flags.IndexMask); + // If end exceeds string length, URI is malformed (e.g., bidi chars were stripped) + if (end > _string.Length) + { + throw GetException(ParsingError.BadHostName)!; + } while (idx < end && (_string[idx] == '/' || _string[idx] == '\\')) { ++idx; @@ -2296,22 +2306,33 @@ private unsafe void CreateUriInfo(Flags cF) // This is NOT an ImplicitFile uri idx = _syntax.SchemeName.Length; - while (_string[idx++] != ':') + while (idx < _string.Length && _string[idx++] != ':') { ++info.Offset.Scheme; } if ((cF & Flags.AuthorityFound) != 0) { - if (_string[idx] == '\\' || _string[idx + 1] == '\\') + if (idx < _string.Length && (idx + 1) < _string.Length && + (_string[idx] == '\\' || _string[idx + 1] == '\\')) notCanonicalScheme = true; idx += 2; + // Ensure idx doesn't exceed string length after increment + if (idx > _string.Length) + { + throw GetException(ParsingError.BadHostName)!; + } if ((cF & (Flags.UncPath | Flags.DosPath)) != 0) { // Skip slashes if it was allowed during ctor time // NB: Today this is only allowed if a Unc or DosPath was found after the scheme int end = (int)(cF & Flags.IndexMask); + // If end exceeds string length, URI is malformed (e.g., bidi chars were stripped) + if (end > _string.Length) + { + throw GetException(ParsingError.BadHostName)!; + } while (idx < end && (_string[idx] == '/' || _string[idx] == '\\')) { notCanonicalScheme = true; @@ -2331,7 +2352,13 @@ private unsafe void CreateUriInfo(Flags cF) ) { //there is no Authority component defined - info.Offset.User = (int)(cF & Flags.IndexMask); + int pathIndex = (int)(cF & Flags.IndexMask); + // Validate index is within string bounds + if (pathIndex > _string.Length) + { + throw GetException(ParsingError.BadHostName)!; + } + info.Offset.User = pathIndex; info.Offset.Host = info.Offset.User; info.Offset.Path = info.Offset.User; cF &= ~Flags.IndexMask; @@ -2348,7 +2375,13 @@ private unsafe void CreateUriInfo(Flags cF) if (HostType == Flags.BasicHostType) { info.Offset.Host = idx; - info.Offset.Path = (int)(cF & Flags.IndexMask); + int pathIndex = (int)(cF & Flags.IndexMask); + // Validate index is within string bounds + if (pathIndex > _string.Length) + { + throw GetException(ParsingError.BadHostName)!; + } + info.Offset.Path = pathIndex; cF &= ~Flags.IndexMask; goto Done; } @@ -2356,7 +2389,7 @@ private unsafe void CreateUriInfo(Flags cF) if ((cF & Flags.HasUserInfo) != 0) { // we previously found a userinfo, get it again - while (_string[idx] != '@') + while (idx < _string.Length && _string[idx] != '@') { ++idx; } @@ -2371,6 +2404,14 @@ private unsafe void CreateUriInfo(Flags cF) //Now reload the end of the parsed host idx = (int)(cF & Flags.IndexMask); + // Ensure idx doesn't exceed string length (e.g., when bidi chars were stripped) + // If it does, the URI is malformed after bidi stripping + if (idx > _string.Length) + { + // Throw UriFormatException instead of allowing IndexOutOfRangeException + throw GetException(ParsingError.BadHostName)!; + } + //From now on we do not need IndexMask bits, and reuse the space for X_NotCanonical flags //clear them now cF &= ~Flags.IndexMask; diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriTests.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriTests.cs index bf1a31d6c7927d..afe6e83019e113 100644 --- a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriTests.cs +++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriTests.cs @@ -1001,5 +1001,21 @@ static void Test(string uriString) Assert.Throws(() => uri.AbsoluteUri); } } + + [Theory] + [InlineData("/\\//")] + [InlineData("\\/\u200e")] + [InlineData("/\\\\-\u0100\r")] + [InlineData("\\\\\\\\\\")] + [InlineData("\\\\\u200E")] + [InlineData("\\\\\u200E:1234")] + [InlineData("\\\\\u200E//")] + [InlineData("\\\\\u200E//ab")] + public static void InvalidUriWithBidiControlCharacters_ThrowsUriFormatException(string uriString) + { + // These URIs should throw UriFormatException, not IndexOutOfRangeException + // Issue: https://github.com/dotnet/runtime/issues/18640 + Assert.Throws(() => new Uri(uriString, UriKind.Absolute)); + } } } From a67d285a4c2b012f0aec0ded8d84c750b4f02225 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 27 Oct 2025 18:14:28 +0000 Subject: [PATCH 3/5] Fix false positive with percent-encoded userinfo in URIs The previous fix was too aggressive and incorrectly threw UriFormatException for valid URIs with percent-encoded userinfo (e.g., "http://%68%65%6C%6C%6F@host/"). The issue occurred because when IRI processing decodes percent-encoded characters, _string becomes shorter than _originalUnicodeString, causing indices from flags to exceed _string.Length. This is similar to bidi character removal but is valid. Changes: - When searching for '@' in userinfo, only increment idx if '@' was found within bounds - When reloading idx from flags, distinguish between: * File UNC paths where out-of-bounds indicates malformed URI from bidi removal -> throw * Other cases like IRI percent-decoding where it's valid -> clamp to string length This maintains the fix for bidi character issues while allowing valid IRI processing. Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../System.Private.Uri/src/System/Uri.cs | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index dc3cd1fb01e680..2d91c884406ad5 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -2393,7 +2393,11 @@ private unsafe void CreateUriInfo(Flags cF) { ++idx; } - ++idx; + // Only increment if we found '@' within bounds + if (idx < _string.Length) + { + ++idx; + } info.Offset.Host = idx; } else @@ -2404,12 +2408,18 @@ private unsafe void CreateUriInfo(Flags cF) //Now reload the end of the parsed host idx = (int)(cF & Flags.IndexMask); - // Ensure idx doesn't exceed string length (e.g., when bidi chars were stripped) - // If it does, the URI is malformed after bidi stripping + // Handle cases where _string was modified during parsing (percent-decoding, bidi removal). + // If idx exceeds _string.Length AND we have a file:// URI with UncPath, this indicates + // bidi character removal created a malformed URI (empty host). Otherwise, clamp for IRI processing. if (idx > _string.Length) { - // Throw UriFormatException instead of allowing IndexOutOfRangeException - throw GetException(ParsingError.BadHostName)!; + if (StaticIsFile(_syntax) && StaticInFact(cF, Flags.UncPath)) + { + // File UNC paths with out-of-bounds indices indicate malformed URIs from bidi removal + throw GetException(ParsingError.BadHostName)!; + } + // For other cases (e.g., percent-decoding in IRI), clamp to string length + idx = _string.Length; } //From now on we do not need IndexMask bits, and reuse the space for X_NotCanonical flags From ed6013a2dce9cde5be74085ae929e747ea2ee60a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 16:55:27 +0000 Subject: [PATCH 4/5] Fix root cause: update idx when newHost replaces _string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per @MihaZupan's feedback, instead of adding defensive bounds checking everywhere, fix the root cause where the IndexMask offset becomes invalid after bidi character removal. When CheckAuthorityHelper creates newHost with bidi characters stripped and replaces _string with it, the idx value (which points to a position in the original string) must be adjusted to account for the removed characters. The adjustment: idx -= (originalLength - newHost.Length) This approach treats the root cause rather than adding defensive checks everywhere. However, some edge cases still need investigation where the calculation doesn't account for all scenarios. Current status: - Percent-encoded userinfo tests: ✅ Pass (213/213) - Bidi character tests: Partial (3/8 pass, 5 still have IndexOutOfRangeException) The remaining failures suggest newHost doesn't always represent the full parsed content up to idx, requiring further investigation of CheckAuthorityHelper's string building logic. Co-authored-by: MihaZupan <25307628+MihaZupan@users.noreply.github.com> --- .../System.Private.Uri/src/System/Uri.cs | 69 +++---------------- 1 file changed, 11 insertions(+), 58 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index 2d91c884406ad5..b0152cf63adc08 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -2234,6 +2234,10 @@ private unsafe ParsingError PrivateParseMinimal() if (newHost is not null) { + // When bidi characters are stripped, newHost is shorter than the original string. + // Adjust idx to account for the removed characters. + int removedChars = length - newHost.Length; + idx -= removedChars; _string = newHost; } } @@ -2273,7 +2277,7 @@ private unsafe void CreateUriInfo(Flags cF) if ((cF & Flags.ImplicitFile) != 0) { idx = 0; - while (idx < _string.Length && UriHelper.IsLWS(_string[idx])) + while (UriHelper.IsLWS(_string[idx])) { ++idx; ++info.Offset.Scheme; @@ -2283,18 +2287,8 @@ private unsafe void CreateUriInfo(Flags cF) { // For implicit file AND Unc only idx += 2; - // Ensure idx doesn't exceed string length after increment - if (idx > _string.Length) - { - throw GetException(ParsingError.BadHostName)!; - } //skip any other slashes (compatibility with V1.0 parser) int end = (int)(cF & Flags.IndexMask); - // If end exceeds string length, URI is malformed (e.g., bidi chars were stripped) - if (end > _string.Length) - { - throw GetException(ParsingError.BadHostName)!; - } while (idx < end && (_string[idx] == '/' || _string[idx] == '\\')) { ++idx; @@ -2306,33 +2300,22 @@ private unsafe void CreateUriInfo(Flags cF) // This is NOT an ImplicitFile uri idx = _syntax.SchemeName.Length; - while (idx < _string.Length && _string[idx++] != ':') + while (_string[idx++] != ':') { ++info.Offset.Scheme; } if ((cF & Flags.AuthorityFound) != 0) { - if (idx < _string.Length && (idx + 1) < _string.Length && - (_string[idx] == '\\' || _string[idx + 1] == '\\')) + if (_string[idx] == '\\' || _string[idx + 1] == '\\') notCanonicalScheme = true; idx += 2; - // Ensure idx doesn't exceed string length after increment - if (idx > _string.Length) - { - throw GetException(ParsingError.BadHostName)!; - } if ((cF & (Flags.UncPath | Flags.DosPath)) != 0) { // Skip slashes if it was allowed during ctor time // NB: Today this is only allowed if a Unc or DosPath was found after the scheme int end = (int)(cF & Flags.IndexMask); - // If end exceeds string length, URI is malformed (e.g., bidi chars were stripped) - if (end > _string.Length) - { - throw GetException(ParsingError.BadHostName)!; - } while (idx < end && (_string[idx] == '/' || _string[idx] == '\\')) { notCanonicalScheme = true; @@ -2352,13 +2335,7 @@ private unsafe void CreateUriInfo(Flags cF) ) { //there is no Authority component defined - int pathIndex = (int)(cF & Flags.IndexMask); - // Validate index is within string bounds - if (pathIndex > _string.Length) - { - throw GetException(ParsingError.BadHostName)!; - } - info.Offset.User = pathIndex; + info.Offset.User = (int)(cF & Flags.IndexMask); info.Offset.Host = info.Offset.User; info.Offset.Path = info.Offset.User; cF &= ~Flags.IndexMask; @@ -2375,13 +2352,7 @@ private unsafe void CreateUriInfo(Flags cF) if (HostType == Flags.BasicHostType) { info.Offset.Host = idx; - int pathIndex = (int)(cF & Flags.IndexMask); - // Validate index is within string bounds - if (pathIndex > _string.Length) - { - throw GetException(ParsingError.BadHostName)!; - } - info.Offset.Path = pathIndex; + info.Offset.Path = (int)(cF & Flags.IndexMask); cF &= ~Flags.IndexMask; goto Done; } @@ -2389,15 +2360,11 @@ private unsafe void CreateUriInfo(Flags cF) if ((cF & Flags.HasUserInfo) != 0) { // we previously found a userinfo, get it again - while (idx < _string.Length && _string[idx] != '@') - { - ++idx; - } - // Only increment if we found '@' within bounds - if (idx < _string.Length) + while (_string[idx] != '@') { ++idx; } + ++idx; info.Offset.Host = idx; } else @@ -2408,20 +2375,6 @@ private unsafe void CreateUriInfo(Flags cF) //Now reload the end of the parsed host idx = (int)(cF & Flags.IndexMask); - // Handle cases where _string was modified during parsing (percent-decoding, bidi removal). - // If idx exceeds _string.Length AND we have a file:// URI with UncPath, this indicates - // bidi character removal created a malformed URI (empty host). Otherwise, clamp for IRI processing. - if (idx > _string.Length) - { - if (StaticIsFile(_syntax) && StaticInFact(cF, Flags.UncPath)) - { - // File UNC paths with out-of-bounds indices indicate malformed URIs from bidi removal - throw GetException(ParsingError.BadHostName)!; - } - // For other cases (e.g., percent-decoding in IRI), clamp to string length - idx = _string.Length; - } - //From now on we do not need IndexMask bits, and reuse the space for X_NotCanonical flags //clear them now cF &= ~Flags.IndexMask; From 14927febaba1a4cfb484067c5387e066e5b4523e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 4 Nov 2025 17:35:46 +0000 Subject: [PATCH 5/5] Refine idx adjustment: use newHost.Length directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The correct adjustment when newHost replaces _string is simply idx = newHost.Length, because newHost contains everything from the start through the end of the authority with bidi chars removed, and idx should point to the position right after the authority. This is more correct than the previous calculation (idx -= length - newHost.Length) which incorrectly used the full URI length instead of just the authority portion. Current test results: - Percent-encoded userinfo tests: ✅ Pass (213/213) - Bidi character tests: Improved but still issues (test framework shows "no exception" for some cases, but manual testing still shows IndexOutOfRangeException for others) The issue appears to be that newHost.Length works for some cases but not others. Further investigation needed into edge cases where the string building in CheckAuthorityHelper doesn't produce the expected result. Co-authored-by: MihaZupan <25307628+MihaZupan@users.noreply.github.com> --- src/libraries/System.Private.Uri/src/System/Uri.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index b0152cf63adc08..caf4b4e1902b94 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -2234,11 +2234,11 @@ private unsafe ParsingError PrivateParseMinimal() if (newHost is not null) { - // When bidi characters are stripped, newHost is shorter than the original string. - // Adjust idx to account for the removed characters. - int removedChars = length - newHost.Length; - idx -= removedChars; _string = newHost; + // newHost contains everything from the start through the end of the authority + // with bidi characters removed. idx should point to the position right after + // the authority in the new string, which is simply newHost.Length. + idx = newHost.Length; } }