Skip to content

[RegexDiff X64] [MihaZupan] Use Contains{Any} in Regex source generator #978

@MihuBot

Description

@MihuBot

Job completed in 15 minutes 44 seconds (remote runner delay: 1 minute 26 seconds).
dotnet/runtime#112065
Using arguments: regexdiff

118 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"^[a-f0-9]{32}$" (4920 uses)
[GeneratedRegex("^[a-f0-9]{32}$")]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
  {
      return false; // The input didn't match.
  }
"\"([a-fA-F0-9-\\{\\}]{36})\"" (569 uses)
[GeneratedRegex("\"([a-fA-F0-9-\\{\\}]{36})\"", RegexOptions.CultureInvariant)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 36).IndexOfAnyExcept(Utilities.s_ascii_20FF037E0000007E000028) >= 0)
+   if (slice.Slice(0, 36).ContainsAnyExcept(Utilities.s_ascii_20FF037E0000007E000028))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.
"^[a-z0-9]{24}$" (285 uses)
[GeneratedRegex("^[a-z0-9]{24}$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 24).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign) >= 0)
+   if (slice.Slice(0, 24).ContainsAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign))
  {
      return false; // The input didn't match.
  }
"^[0-9a-f]{40}$" (202 uses)
[GeneratedRegex("^[0-9a-f]{40}$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      return false; // The input didn't match.
  }
"\\A(?:[A-Z0-9]{17})\\z" (182 uses)
[GeneratedRegex("\\A(?:[A-Z0-9]{17})\\z")]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 17).IndexOfAnyExcept(Utilities.s_asciiLettersUpperAndDigits) >= 0)
+   if (slice.Slice(0, 17).ContainsAnyExcept(Utilities.s_asciiLettersUpperAndDigits))
  {
      return false; // The input didn't match.
  }
"^\\\\((?<StoreLocation>CurrentUser|LocalMach ..." (167 uses)
[GeneratedRegex("^\\\\((?<StoreLocation>CurrentUser|LocalMachine)(\\\\(?<StoreName>[a-zA-Z]+)(\\\\(?<Thumbprint>[0-9a-f]{40}))?)?)?$")]
      goto LoopIterationNoMatch2;
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
  {
      goto LoopIterationNoMatch2;
  }
"IR[0-9]{24}" (144 uses)
[GeneratedRegex("IR[0-9]{24}", RegexOptions.IgnoreCase)]
  // Match a character in the set [0-9] exactly 24 times.
  {
-       if (slice.Slice(2, 24).IndexOfAnyExceptInRange('0', '9') >= 0)
+       if (slice.Slice(2, 24).ContainsAnyExceptInRange('0', '9'))
      {
          return false; // The input didn't match.
      }
"^committed\\s+changeset\\s+\\d+:(?<hash>[0-9 ..." (132 uses)
[GeneratedRegex("^committed\\s+changeset\\s+\\d+:(?<hash>[0-9a-f]{40})$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.
"^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([ ..." (130 uses)
[GeneratedRegex("^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}(}|\\))?$|^({)?[0xA-Fa-f0-9]{3,10}(, {0,1}[0xA-Fa-f0-9]{3,6}){2}, {0,1}({)([0xA-Fa-f0-9]{3,4}, {0,1}){7}[0xA-Fa-f0-9]{3,4}(}})$")]
      goto AlternationBranch;
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      goto AlternationBranch;
  }
"asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9 ..." (99 uses)
[GeneratedRegex("asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9]+)(/(?<flags>[a-zA-Z0-9]*))?", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.

For more diff examples, see https://gist.github.com/MihuBot/47c9e3e7aadcb5bd85d2a8b35243f90d

Total bytes of base: 53924200
Total bytes of diff: 53924088
Total bytes of delta: -112 (-0.00 % of base)
Total relative delta: -0.02
    diff is an improvement.
    relative diff is an improvement.

For a list of JIT diff improvements, see Improvements.md

Sample source code for further analysis
const string JsonPath = "RegexResults-978.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/EogtjSRA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions