diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 8d49e71e3b90aa..6abfc4a1509d5d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -521,12 +521,10 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) } else { - // The inner expression found an empty match, so we'll go directly to 'back2' if we - // backtrack. In this case, we need to push something on the stack, since back2 pops. - // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text - // position associated with that empty match. - StackPush(oldMarkPos); - TrackPush2(StackPeek()); // Save old mark + // The inner expression found an empty match. We'll go directly to BacktrackingSecond + // if we backtrack. We do not touch the grouping stack here... instead, we record the + // old mark and a "no-stack-pop" flag (0) on the backtracking stack. + TrackPush2(oldMarkPos, 0); } } advance = 1; @@ -541,7 +539,11 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) // fails, we go to Lazybranchmark | RegexOpcode.Back2 TrackPop(2); int pos = TrackPeek(1); - TrackPush2(TrackPeek()); // Save old mark + + // Store the previous mark. The second value (1) flags that a grouping stack frame + // must be popped when backtracking, because a new frame will be pushed next. + TrackPush2(TrackPeek(), 1); + StackPush(pos); // Make new mark runtextpos = pos; // Recall position Goto(Operand(0)); // Loop @@ -551,9 +553,17 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) case RegexOpcode.Lazybranchmark | RegexOpcode.BacktrackingSecond: // The lazy loop has failed. We'll do a true backtrack and // start over before the lazy loop. - StackPop(); - TrackPop(); - StackPush(TrackPeek()); // Recall old mark + int needsPop = runtrack![runtrackpos]; // flag: 0 or 1 + int oldMark = runtrack[runtrackpos + 1]; // saved old mark + runtrackpos += 2; // consume both payload ints + if (needsPop != 0) + { + // We pushed on the grouping stack in the Backtracking arm; balance it now. + StackPop(); + } + + // Restore the old mark and backtrack + StackPush(oldMark); break; case RegexOpcode.Setcount: diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 01553e0659b850..f7f60f1e1ecb04 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -329,6 +329,26 @@ public static IEnumerable Match_MemberData() yield return (@"(b|a|aa)((?:aa)+?)+?$", "aaaaaaaa", RegexOptions.None, 0, 8, true, "aaaaaaaa"); yield return (@"(|a|aa)(((?:aa)+?)+?|aaaaab)\w$", "aaaaaabc", RegexOptions.None, 0, 8, true, "aaaaaabc"); + // Lazy loops with empty matches + if (!PlatformDetection.IsNetFramework) + { + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/111051 + yield return (@"(.)+()+?b", "xyzb", RegexOptions.None, 0, 4, true, "xyzb"); + yield return (@"^(.)+()+?b", "xyzb", RegexOptions.None, 0, 4, true, "xyzb"); + + if (!RegexHelpers.IsNonBacktracking(engine)) + { + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/58786 + yield return (@"(?(-*)+?-*)$", "abc", RegexOptions.None, 0, 3, true, ""); + + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/63385 + yield return (@"(^+?)?()", "1", RegexOptions.None, 0, 1, true, ""); + } + } + // Nested loops yield return (@"(abcd*)+e", "abcde", RegexOptions.None, 0, 5, true, "abcde"); yield return (@"(abcd*?)+e", "abcde", RegexOptions.None, 0, 5, true, "abcde"); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs index ab5f20f413104d..cf3890fede1fe2 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs @@ -452,9 +452,18 @@ public static IEnumerable Matches_TestData() new CaptureData("x", 3, 1), } }; + + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/111051 + yield return new object[] + { + engine, @"anyexpress1(?<=(.(any express|(any express)*)+?)anyexpress1)", "anystring anyexpress1", RegexOptions.None, new[] + { + new CaptureData("anyexpress1", 10, 11), + } + }; } - // Fails on .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")] + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/62094 yield return new object[] { engine, @"(?:){93}", "x", RegexOptions.None, new[] @@ -463,6 +472,16 @@ public static IEnumerable Matches_TestData() new CaptureData("", 1, 0) } }; + + // Fails on .NET Framework: https://github.com/dotnet/runtime/issues/43314 + yield return new object[] + { + engine, @"(?:(?:0?)+?(?:a?)+?)?", "0a", RegexOptions.None, new[] + { + new CaptureData("0a", 0, 2), + new CaptureData("", 2, 0), + } + }; #endif } }