forked from MihaZupan/runtime-utils
-
Couldn't load subscription status.
- Fork 0
Open
Description
Job completed in 18 minutes 58 seconds (remote runner delay: 1 minute 15 seconds).
dotnet/runtime#117892
Using arguments: regexdiff
16 out of 18857 patterns have generated source code changes.
Examples of GeneratedRegex source diffs
"\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9] ..." (293 uses)
[GeneratedRegex("\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\b", RegexOptions.CultureInvariant)] /// ○ Match a character in the set [0-9].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [01] greedily, optionally.<br/>
- /// ○ Match a character in the set [0-9] greedily at least 1 and at most 2 times.<br/>
+ /// ○ Match a character in the set [0-9] atomically at least 1 and at most 2 times.<br/>
/// ○ Match if at a word boundary.<br/>
/// </code>
/// </remarks>
int alternation_starting_pos1 = 0;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
int charloop_starting_pos1 = 0, charloop_ending_pos1 = 0;
- int charloop_starting_pos2 = 0, charloop_ending_pos2 = 0;
int loop_iteration = 0;
int stackpos = 0;
int startingStackpos = 0;
CharLoopEnd1:
//}
- // Match a character in the set [0-9] greedily at least 1 and at most 2 times.
- //{
- charloop_starting_pos2 = pos;
-
+ // Match a character in the set [0-9] atomically at least 1 and at most 2 times.
+ {
int iteration1 = 0;
while (iteration1 < 2 && (uint)iteration1 < (uint)slice.Length && char.IsAsciiDigit(slice[iteration1]))
{
slice = slice.Slice(iteration1);
pos += iteration1;
-
- charloop_ending_pos2 = pos;
- charloop_starting_pos2++;
- goto CharLoopEnd2;
-
- CharLoopBacktrack2:
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos2 >= charloop_ending_pos2)
- {
- goto CharLoopBacktrack1;
- }
- pos = --charloop_ending_pos2;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd2:
- //}
+ }
alternation_branch = 1;
goto AlternationMatch1;
case 0:
goto AlternationBranch1;
case 1:
- goto CharLoopBacktrack2;
+ goto CharLoopBacktrack1;
}
AlternationMatch1:;"\\b[\\w\\d\\.\\-]+\\@[\\w\\d\\.\\-]+\\.[a-z] ..." (267 uses)
[GeneratedRegex("\\b[\\w\\d\\.\\-]+\\@[\\w\\d\\.\\-]+\\.[a-z]{2,6}\\b")] /// ○ Match '@'.<br/>
/// ○ Match a character in the set [-.\w\d] greedily at least once.<br/>
/// ○ Match '.'.<br/>
- /// ○ Match a character in the set [a-z] greedily at least 2 and at most 6 times.<br/>
+ /// ○ Match a character in the set [a-z] atomically at least 2 and at most 6 times.<br/>
/// ○ Match if at a word boundary.<br/>
/// </code>
/// </remarks>
int matchStart = pos;
char ch;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
- int charloop_starting_pos1 = 0, charloop_ending_pos1 = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Match if at a word boundary.
goto CharLoopBacktrack;
}
- // Match a character in the set [a-z] greedily at least 2 and at most 6 times.
- //{
+ // Match a character in the set [a-z] atomically at least 2 and at most 6 times.
+ {
pos++;
slice = inputSpan.Slice(pos);
- charloop_starting_pos1 = pos;
-
int iteration2 = 0;
while (iteration2 < 6 && (uint)iteration2 < (uint)slice.Length && char.IsAsciiLetterLower(slice[iteration2]))
{
slice = slice.Slice(iteration2);
pos += iteration2;
-
- charloop_ending_pos1 = pos;
- charloop_starting_pos1 += 2;
- goto CharLoopEnd1;
-
- CharLoopBacktrack1:
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos1 >= charloop_ending_pos1)
- {
- goto CharLoopBacktrack;
- }
- pos = --charloop_ending_pos1;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd1:
- //}
+ }
// Match if at a word boundary.
if (!Utilities.IsBoundary(inputSpan, pos))
{
- goto CharLoopBacktrack1;
+ goto CharLoopBacktrack;
}
// The input matched."\\A\\b[0-9a-f]+\\b\\Z" (178 uses)
[GeneratedRegex("\\A\\b[0-9a-f]+\\b\\Z")] /// <code>
/// ○ Match if at the beginning of the string.<br/>
/// ○ Match if at a word boundary.<br/>
- /// ○ Match a character in the set [0-9a-f] greedily at least once.<br/>
+ /// ○ Match a character in the set [0-9a-f] atomically at least once.<br/>
/// ○ Match if at a word boundary.<br/>
/// ○ Match if at the end of the string or if before an ending newline.<br/>
/// </code>
{
int pos = base.runtextpos;
int matchStart = pos;
- int charloop_starting_pos = 0, charloop_ending_pos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Match if at the beginning of the string.
return false; // The input didn't match.
}
- // Match a character in the set [0-9a-f] greedily at least once.
- //{
- charloop_starting_pos = pos;
-
+ // Match a character in the set [0-9a-f] atomically at least once.
+ {
int iteration = slice.IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower);
if (iteration < 0)
{
slice = slice.Slice(iteration);
pos += iteration;
-
- charloop_ending_pos = pos;
- charloop_starting_pos++;
- goto CharLoopEnd;
-
- CharLoopBacktrack:
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos >= charloop_ending_pos)
- {
- return false; // The input didn't match.
- }
- pos = --charloop_ending_pos;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd:
- //}
+ }
// Match if at a word boundary.
if (!Utilities.IsBoundary(inputSpan, pos))
{
- goto CharLoopBacktrack;
+ return false; // The input didn't match.
}
// Match if at the end of the string or if before an ending newline.
if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
{
- goto CharLoopBacktrack;
+ return false; // The input didn't match.
}
// The input matched."\\A\\p{Lu}{2,}\\b" (117 uses)
[GeneratedRegex("\\A\\p{Lu}{2,}\\b")] /// Explanation:<br/>
/// <code>
/// ○ Match if at the beginning of the string.<br/>
- /// ○ Match a character in the set [\p{Lu}] greedily at least twice.<br/>
+ /// ○ Match a character in the set [\p{Lu}] atomically at least twice.<br/>
/// ○ Match if at a word boundary.<br/>
/// </code>
/// </remarks>
{
int pos = base.runtextpos;
int matchStart = pos;
- int charloop_starting_pos = 0, charloop_ending_pos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
// Match if at the beginning of the string.
return false; // The input didn't match.
}
- // Match a character in the set [\p{Lu}] greedily at least twice.
- //{
- charloop_starting_pos = pos;
-
+ // Match a character in the set [\p{Lu}] atomically at least twice.
+ {
int iteration = 0;
while ((uint)iteration < (uint)slice.Length && char.IsUpper(slice[iteration]))
{
slice = slice.Slice(iteration);
pos += iteration;
-
- charloop_ending_pos = pos;
- charloop_starting_pos += 2;
- goto CharLoopEnd;
-
- CharLoopBacktrack:
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos >= charloop_ending_pos)
- {
- return false; // The input didn't match.
- }
- pos = --charloop_ending_pos;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd:
- //}
+ }
// Match if at a word boundary.
if (!Utilities.IsBoundary(inputSpan, pos))
{
- goto CharLoopBacktrack;
+ return false; // The input didn't match.
}
// The input matched."(\\d+)|(\\b([MDCLXVI]+)\\b)" (112 uses)
[GeneratedRegex("(\\d+)|(\\b([MDCLXVI]+)\\b)", RegexOptions.IgnoreCase)] /// ○ 2nd capture group.<br/>
/// ○ Match if at a word boundary.<br/>
/// ○ 3rd capture group.<br/>
- /// ○ Match a character in the set [CDILMVXcdilmvx] greedily at least once.<br/>
+ /// ○ Match a character in the set [CDILMVXcdilmvx] atomically at least once.<br/>
/// ○ Match if at a word boundary.<br/>
/// </code>
/// </remarks>
int capture_starting_pos = 0;
int capture_starting_pos1 = 0;
int capture_starting_pos2 = 0;
- int charloop_capture_pos = 0;
- int charloop_starting_pos = 0, charloop_ending_pos = 0;
- int stackpos = 0;
ReadOnlySpan<char> slice = inputSpan.Slice(pos);
- // Atomic group.
+ // Match with 2 alternative expressions, atomically.
{
- int atomic_stackpos = stackpos;
+ int alternation_starting_pos = pos;
+ int alternation_starting_capturepos = base.Crawlpos();
- // Match with 2 alternative expressions, atomically.
- //{
- int alternation_starting_pos = pos;
- int alternation_starting_capturepos = base.Crawlpos();
-
- // Branch 0
+ // Branch 0
+ {
+ // 1st capture group.
{
- // 1st capture group.
+ capture_starting_pos = pos;
+
+ // Match a Unicode digit atomically at least once.
{
- capture_starting_pos = pos;
-
- // Match a Unicode digit atomically at least once.
+ int iteration = 0;
+ while ((uint)iteration < (uint)slice.Length && char.IsDigit(slice[iteration]))
{
- int iteration = 0;
- while ((uint)iteration < (uint)slice.Length && char.IsDigit(slice[iteration]))
- {
- iteration++;
- }
-
- if (iteration == 0)
- {
- goto AlternationBranch;
- }
-
- slice = slice.Slice(iteration);
- pos += iteration;
+ iteration++;
}
- base.Capture(1, capture_starting_pos, pos);
+ if (iteration == 0)
+ {
+ goto AlternationBranch;
+ }
+
+ slice = slice.Slice(iteration);
+ pos += iteration;
}
- goto AlternationMatch;
-
- AlternationBranch:
- pos = alternation_starting_pos;
- slice = inputSpan.Slice(pos);
- UncaptureUntil(alternation_starting_capturepos);
+ base.Capture(1, capture_starting_pos, pos);
}
- // Branch 1
- {
- // 2nd capture group.
- //{
- capture_starting_pos1 = pos;
-
- // Match if at a word boundary.
- if (!Utilities.IsBoundary(inputSpan, pos))
- {
- UncaptureUntil(0);
- return false; // The input didn't match.
- }
-
- // 3rd capture group.
- //{
- capture_starting_pos2 = pos;
-
- // Match a character in the set [CDILMVXcdilmvx] greedily at least once.
- //{
- charloop_starting_pos = pos;
-
- int iteration1 = slice.IndexOfAnyExcept(Utilities.s_ascii_1832400118324001);
- if (iteration1 < 0)
- {
- iteration1 = slice.Length;
- }
-
- if (iteration1 == 0)
- {
- UncaptureUntil(0);
- return false; // The input didn't match.
- }
-
- slice = slice.Slice(iteration1);
- pos += iteration1;
-
- charloop_ending_pos = pos;
- charloop_starting_pos++;
- goto CharLoopEnd;
-
- CharLoopBacktrack:
- UncaptureUntil(charloop_capture_pos);
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos >= charloop_ending_pos)
- {
- UncaptureUntil(0);
- return false; // The input didn't match.
- }
- pos = --charloop_ending_pos;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd:
- charloop_capture_pos = base.Crawlpos();
- //}
-
- base.Capture(3, capture_starting_pos2, pos);
-
- goto CaptureSkipBacktrack;
-
- CaptureBacktrack:
- goto CharLoopBacktrack;
-
- CaptureSkipBacktrack:;
- //}
-
- // Match if at a word boundary.
- if (!Utilities.IsBoundary(inputSpan, pos))
- {
- goto CaptureBacktrack;
- }
-
- base.Capture(2, capture_starting_pos1, pos);
- //}
-
- }
+ goto AlternationMatch;
- AlternationMatch:;
- //}
+ AlternationBranch:
+ pos = alternation_starting_pos;
+ slice = inputSpan.Slice(pos);
+ UncaptureUntil(alternation_starting_capturepos);
+ }
- stackpos = atomic_stackpos;
+ // Branch 1
+ {
+ // 2nd capture group.
+ {
+ capture_starting_pos1 = pos;
+
+ // Match if at a word boundary.
+ if (!Utilities.IsBoundary(inputSpan, pos))
+ {
+ UncaptureUntil(0);
+ return false; // The input didn't match.
+ }
+
+ // 3rd capture group.
+ {
+ capture_starting_pos2 = pos;
+
+ // Match a character in the set [CDILMVXcdilmvx] atomically at least once.
+ {
+ int iteration1 = slice.IndexOfAnyExcept(Utilities.s_ascii_1832400118324001);
+ if (iteration1 < 0)
+ {
+ iteration1 = slice.Length;
+ }
+
+ if (iteration1 == 0)
+ {
+ UncaptureUntil(0);
+ return false; // The input didn't match.
+ }
+
+ slice = slice.Slice(iteration1);
+ pos += iteration1;
+ }
+
+ base.Capture(3, capture_starting_pos2, pos);
+ }
+
+ // Match if at a word boundary.
+ if (!Utilities.IsBoundary(inputSpan, pos))
+ {
+ UncaptureUntil(0);
+ return false; // The input didn't match.
+ }
+
+ base.Capture(2, capture_starting_pos1, pos);
+ }
+
+ }
+
+ AlternationMatch:;
}
// The input matched."(\\b(?:(?:2(?:[0-4][0-9]|5[0-5])|[0-1]?[0-9] ..." (85 uses)
[GeneratedRegex("(\\b(?:(?:2(?:[0-4][0-9]|5[0-5])|[0-1]?[0-9]?[0-9])\\.){3}(?:(?:2([0-4][0-9]|5[0-5])|[0-1]?[0-9]?[0-9]))\\b)")] /// ○ Match a character in the set [0-5].<br/>
/// ○ Match a sequence of expressions.<br/>
/// ○ Match a character in the set [01] greedily, optionally.<br/>
- /// ○ Match a character in the set [0-9] greedily at least 1 and at most 2 times.<br/>
+ /// ○ Match a character in the set [0-9] atomically at least 1 and at most 2 times.<br/>
/// ○ Match if at a word boundary.<br/>
/// </code>
/// </remarks>
int capture_starting_pos = 0;
int capture_starting_pos1 = 0;
int charloop_capture_pos = 0;
- int charloop_capture_pos1 = 0;
int charloop_starting_pos = 0, charloop_ending_pos = 0;
int charloop_starting_pos1 = 0, charloop_ending_pos1 = 0;
- int charloop_starting_pos2 = 0, charloop_ending_pos2 = 0;
int loop_iteration = 0;
int stackpos = 0;
int startingStackpos = 0;
charloop_capture_pos = base.Crawlpos();
//}
- // Match a character in the set [0-9] greedily at least 1 and at most 2 times.
- //{
- charloop_starting_pos2 = pos;
-
+ // Match a character in the set [0-9] atomically at least 1 and at most 2 times.
+ {
int iteration1 = 0;
while (iteration1 < 2 && (uint)iteration1 < (uint)slice.Length && char.IsAsciiDigit(slice[iteration1]))
{
slice = slice.Slice(iteration1);
pos += iteration1;
-
- charloop_ending_pos2 = pos;
- charloop_starting_pos2++;
- goto CharLoopEnd2;
-
- CharLoopBacktrack2:
- UncaptureUntil(charloop_capture_pos1);
-
- if (Utilities.s_hasTimeout)
- {
- base.CheckTimeout();
- }
-
- if (charloop_starting_pos2 >= charloop_ending_pos2)
- {
- goto CharLoopBacktrack1;
- }
- pos = --charloop_ending_pos2;
- slice = inputSpan.Slice(pos);
-
- CharLoopEnd2:
- charloop_capture_pos1 = base.Crawlpos();
- //}
+ }
alternation_branch = 1;
goto AlternationMatch1;
case 0:
goto AlternationBranch1;
case 1:
- goto CharLoopBacktrack2;
+ goto CharLoopBacktrack1;
}
AlternationMatch1:;For more diff examples, see https://gist.github.com/MihuBot/f53211ef2653356a070d40dbd2e82145
Total bytes of base: 54709209
Total bytes of diff: 54706120
Total bytes of delta: -3089 (-0.01 % of base)
Total relative delta: -1.74
diff is an improvement.
relative diff is an improvement.
For a list of JIT diff improvements, see Improvements.md
Sample source code for further analysis
const string JsonPath = "RegexResults-1254.json";
if (!File.Exists(JsonPath))
{
await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/E2Lh8F_A");
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}
using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");
record KnownPattern(string Pattern, RegexOptions Options, int Count);
sealed class RegexEntry
{
public required KnownPattern Regex { get; set; }
public required string MainSource { get; set; }
public required string PrSource { get; set; }
public string? FullDiff { get; set; }
public string? ShortDiff { get; set; }
public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}Artifacts:
- ShortExampleDiffs.md (27 KB)
- LongExampleDiffs.md (70 KB)
- Results.zip (42 MB)
- JitAnalyzeSummary.txt (7 KB)
- JitDiffImprovements.md (877 KB)
- LongJitDiffImprovements.md (1 MB)
- jit-diffs.zip (394 MB)
Metadata
Metadata
Assignees
Labels
No labels