diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 1d23aa697f11fe..8ac987b5074718 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -954,6 +954,20 @@ bool EmitAnchors() switch (regexTree.FindOptimizations.FindMode) { case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning: + // If we also have a trailing End anchor with fixed length, we can check for exact length match. + // Compute this lazily to avoid overhead in the interpreter. + if (RegexPrefixAnalyzer.FindTrailingAnchor(regexTree.Root) == RegexNodeKind.End && + regexTree.Root.ComputeMaxLength() == regexTree.FindOptimizations.MinRequiredLength) + { + int minRequiredLength = regexTree.FindOptimizations.MinRequiredLength; + writer.WriteLine($"// The pattern leads with a beginning (\\A) anchor and has a trailing end (\\z) anchor, and any possible match is exactly {minRequiredLength} characters."); + using (EmitBlock(writer, $"if (pos == 0 && inputSpan.Length == {minRequiredLength})")) + { + writer.WriteLine("return true;"); + } + return true; + } + writer.WriteLine("// The pattern leads with a beginning (\\A) anchor."); using (EmitBlock(writer, "if (pos == 0)")) { diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 3d34b33d183ea8..3841d7a02b570e 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -507,6 +507,25 @@ bool EmitAnchors() switch (_regexTree.FindOptimizations.FindMode) { case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning: + // If we also have a trailing End anchor with fixed length, we can check for exact length match. + // Compute this lazily to avoid overhead in the interpreter. + if (RegexPrefixAnalyzer.FindTrailingAnchor(_regexTree.Root) == RegexNodeKind.End && + _regexTree.Root.ComputeMaxLength() == _regexTree.FindOptimizations.MinRequiredLength) + { + // if (pos != 0 || inputSpan.Length != minRequiredLength) goto returnFalse; + // return true; + Ldloc(pos); + Ldc(0); + Bne(returnFalse); + Ldloca(inputSpan); + Call(SpanGetLengthMethod); + Ldc(_regexTree.FindOptimizations.MinRequiredLength); + Bne(returnFalse); + Ldc(1); + Ret(); + return true; + } + // if (pos != 0) goto returnFalse; // return true; Ldloc(pos); diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 2510769b514827..6539dd82fdcf82 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1119,6 +1119,40 @@ public static IEnumerable Match_MemberData() yield return (@"a?(\b|c)", "ac", RegexOptions.None, 0, 2, true, "ac"); yield return (@"(a|())*(\b|c)", "ac", RegexOptions.None, 0, 2, true, "ac"); yield return (@"(\b|a)*", "a", RegexOptions.None, 0, 1, true, ""); + + // Tests for patterns with both beginning and end anchors with fixed length (optimization for early fail-fast) + yield return (@"^1234\z", "1234", RegexOptions.None, 0, 4, true, "1234"); + yield return (@"^1234\z", "12345", RegexOptions.None, 0, 5, false, ""); + yield return (@"^1234\z", "123", RegexOptions.None, 0, 3, false, ""); + yield return (@"^1234\z", "x1234", RegexOptions.None, 0, 5, false, ""); + yield return (@"\Aabc\z", "abc", RegexOptions.None, 0, 3, true, "abc"); + yield return (@"\Aabc\z", "abcd", RegexOptions.None, 0, 4, false, ""); + + // Test variations without starting anchor (should not trigger optimization) + yield return (@"1234\z", "1234", RegexOptions.None, 0, 4, true, "1234"); + yield return (@"1234\z", "x1234", RegexOptions.None, 0, 5, true, "1234"); + + // Test with ^ anchor but with Multiline (should not trigger optimization as ^ matches line start in Multiline) + yield return (@"^1234\z", "1234", RegexOptions.Multiline, 0, 4, true, "1234"); + yield return (@"^1234\z", "x\n1234", RegexOptions.Multiline, 0, 6, true, "1234"); + + // Test with \Z anchor (allows optional \n, so should not trigger optimization) + yield return (@"^1234\Z", "1234", RegexOptions.None, 0, 4, true, "1234"); + yield return (@"^1234\Z", "1234\n", RegexOptions.None, 0, 5, true, "1234"); + yield return (@"^1234\Z", "12345", RegexOptions.None, 0, 5, false, ""); + + // Test with $ anchor (should not trigger optimization as $ allows optional \n) + yield return (@"^1234$", "1234", RegexOptions.None, 0, 4, true, "1234"); + yield return (@"^1234$", "1234\n", RegexOptions.None, 0, 5, true, "1234"); + yield return (@"^1234$", "12345", RegexOptions.None, 0, 5, false, ""); + + // Test with something before the ^ starting anchor + yield return (@"x^1234\z", "1234", RegexOptions.None, 0, 4, false, ""); + yield return (@"x^1234\z", "x1234", RegexOptions.None, 0, 5, false, ""); + + // Test with something after the \z trailing anchor + yield return (@"^1234\zx", "1234", RegexOptions.None, 0, 4, false, ""); + yield return (@"^1234\zx", "1234x", RegexOptions.None, 0, 5, false, ""); } [OuterLoop("Takes several seconds to run")]