Skip to content

Commit ed6013a

Browse files
CopilotMihaZupan
andcommitted
Fix root cause: update idx when newHost replaces _string
Per @MihaZupan's feedback, instead of adding defensive bounds checking everywhere, fix the root cause where the IndexMask offset becomes invalid after bidi character removal. When CheckAuthorityHelper creates newHost with bidi characters stripped and replaces _string with it, the idx value (which points to a position in the original string) must be adjusted to account for the removed characters. The adjustment: idx -= (originalLength - newHost.Length) This approach treats the root cause rather than adding defensive checks everywhere. However, some edge cases still need investigation where the calculation doesn't account for all scenarios. Current status: - Percent-encoded userinfo tests: ✅ Pass (213/213) - Bidi character tests: Partial (3/8 pass, 5 still have IndexOutOfRangeException) The remaining failures suggest newHost doesn't always represent the full parsed content up to idx, requiring further investigation of CheckAuthorityHelper's string building logic. Co-authored-by: MihaZupan <[email protected]>
1 parent 1b9c0b3 commit ed6013a

File tree

1 file changed

+11
-58
lines changed
  • src/libraries/System.Private.Uri/src/System

1 file changed

+11
-58
lines changed

src/libraries/System.Private.Uri/src/System/Uri.cs

Lines changed: 11 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -2234,6 +2234,10 @@ private unsafe ParsingError PrivateParseMinimal()
22342234

22352235
if (newHost is not null)
22362236
{
2237+
// When bidi characters are stripped, newHost is shorter than the original string.
2238+
// Adjust idx to account for the removed characters.
2239+
int removedChars = length - newHost.Length;
2240+
idx -= removedChars;
22372241
_string = newHost;
22382242
}
22392243
}
@@ -2273,7 +2277,7 @@ private unsafe void CreateUriInfo(Flags cF)
22732277
if ((cF & Flags.ImplicitFile) != 0)
22742278
{
22752279
idx = 0;
2276-
while (idx < _string.Length && UriHelper.IsLWS(_string[idx]))
2280+
while (UriHelper.IsLWS(_string[idx]))
22772281
{
22782282
++idx;
22792283
++info.Offset.Scheme;
@@ -2283,18 +2287,8 @@ private unsafe void CreateUriInfo(Flags cF)
22832287
{
22842288
// For implicit file AND Unc only
22852289
idx += 2;
2286-
// Ensure idx doesn't exceed string length after increment
2287-
if (idx > _string.Length)
2288-
{
2289-
throw GetException(ParsingError.BadHostName)!;
2290-
}
22912290
//skip any other slashes (compatibility with V1.0 parser)
22922291
int end = (int)(cF & Flags.IndexMask);
2293-
// If end exceeds string length, URI is malformed (e.g., bidi chars were stripped)
2294-
if (end > _string.Length)
2295-
{
2296-
throw GetException(ParsingError.BadHostName)!;
2297-
}
22982292
while (idx < end && (_string[idx] == '/' || _string[idx] == '\\'))
22992293
{
23002294
++idx;
@@ -2306,33 +2300,22 @@ private unsafe void CreateUriInfo(Flags cF)
23062300
// This is NOT an ImplicitFile uri
23072301
idx = _syntax.SchemeName.Length;
23082302

2309-
while (idx < _string.Length && _string[idx++] != ':')
2303+
while (_string[idx++] != ':')
23102304
{
23112305
++info.Offset.Scheme;
23122306
}
23132307

23142308
if ((cF & Flags.AuthorityFound) != 0)
23152309
{
2316-
if (idx < _string.Length && (idx + 1) < _string.Length &&
2317-
(_string[idx] == '\\' || _string[idx + 1] == '\\'))
2310+
if (_string[idx] == '\\' || _string[idx + 1] == '\\')
23182311
notCanonicalScheme = true;
23192312

23202313
idx += 2;
2321-
// Ensure idx doesn't exceed string length after increment
2322-
if (idx > _string.Length)
2323-
{
2324-
throw GetException(ParsingError.BadHostName)!;
2325-
}
23262314
if ((cF & (Flags.UncPath | Flags.DosPath)) != 0)
23272315
{
23282316
// Skip slashes if it was allowed during ctor time
23292317
// NB: Today this is only allowed if a Unc or DosPath was found after the scheme
23302318
int end = (int)(cF & Flags.IndexMask);
2331-
// If end exceeds string length, URI is malformed (e.g., bidi chars were stripped)
2332-
if (end > _string.Length)
2333-
{
2334-
throw GetException(ParsingError.BadHostName)!;
2335-
}
23362319
while (idx < end && (_string[idx] == '/' || _string[idx] == '\\'))
23372320
{
23382321
notCanonicalScheme = true;
@@ -2352,13 +2335,7 @@ private unsafe void CreateUriInfo(Flags cF)
23522335
)
23532336
{
23542337
//there is no Authority component defined
2355-
int pathIndex = (int)(cF & Flags.IndexMask);
2356-
// Validate index is within string bounds
2357-
if (pathIndex > _string.Length)
2358-
{
2359-
throw GetException(ParsingError.BadHostName)!;
2360-
}
2361-
info.Offset.User = pathIndex;
2338+
info.Offset.User = (int)(cF & Flags.IndexMask);
23622339
info.Offset.Host = info.Offset.User;
23632340
info.Offset.Path = info.Offset.User;
23642341
cF &= ~Flags.IndexMask;
@@ -2375,29 +2352,19 @@ private unsafe void CreateUriInfo(Flags cF)
23752352
if (HostType == Flags.BasicHostType)
23762353
{
23772354
info.Offset.Host = idx;
2378-
int pathIndex = (int)(cF & Flags.IndexMask);
2379-
// Validate index is within string bounds
2380-
if (pathIndex > _string.Length)
2381-
{
2382-
throw GetException(ParsingError.BadHostName)!;
2383-
}
2384-
info.Offset.Path = pathIndex;
2355+
info.Offset.Path = (int)(cF & Flags.IndexMask);
23852356
cF &= ~Flags.IndexMask;
23862357
goto Done;
23872358
}
23882359

23892360
if ((cF & Flags.HasUserInfo) != 0)
23902361
{
23912362
// we previously found a userinfo, get it again
2392-
while (idx < _string.Length && _string[idx] != '@')
2393-
{
2394-
++idx;
2395-
}
2396-
// Only increment if we found '@' within bounds
2397-
if (idx < _string.Length)
2363+
while (_string[idx] != '@')
23982364
{
23992365
++idx;
24002366
}
2367+
++idx;
24012368
info.Offset.Host = idx;
24022369
}
24032370
else
@@ -2408,20 +2375,6 @@ private unsafe void CreateUriInfo(Flags cF)
24082375
//Now reload the end of the parsed host
24092376
idx = (int)(cF & Flags.IndexMask);
24102377

2411-
// Handle cases where _string was modified during parsing (percent-decoding, bidi removal).
2412-
// If idx exceeds _string.Length AND we have a file:// URI with UncPath, this indicates
2413-
// bidi character removal created a malformed URI (empty host). Otherwise, clamp for IRI processing.
2414-
if (idx > _string.Length)
2415-
{
2416-
if (StaticIsFile(_syntax) && StaticInFact(cF, Flags.UncPath))
2417-
{
2418-
// File UNC paths with out-of-bounds indices indicate malformed URIs from bidi removal
2419-
throw GetException(ParsingError.BadHostName)!;
2420-
}
2421-
// For other cases (e.g., percent-decoding in IRI), clamp to string length
2422-
idx = _string.Length;
2423-
}
2424-
24252378
//From now on we do not need IndexMask bits, and reuse the space for X_NotCanonical flags
24262379
//clear them now
24272380
cF &= ~Flags.IndexMask;

0 commit comments

Comments
 (0)