forked from MihaZupan/runtime-utils
-
Couldn't load subscription status.
- Fork 0
Open
Description
Job completed in 18 minutes 16 seconds (remote runner delay: 1 minute 14 seconds).
dotnet/runtime#118105
Using arguments: regexdiff
3865 out of 18857 patterns have generated source code changes.
Examples of GeneratedRegex source diffs
"{\\s*(?<P>\\D\\w*)\\s*\\:\\s*var\\(\\s*(?<B> ..." (9881 uses)
[GeneratedRegex("{\\s*(?<P>\\D\\w*)\\s*\\:\\s*var\\(\\s*(?<B>\\D\\w*)\\s*\\)\\s*(;\\s*(?<P>\\D\\w*)\\s*\\:\\s*var\\(\\s*(?<B>\\D\\w*)\\s*\\)\\s*\\s*)*}")] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
StackPush(ref stack, ref pos, arg0, arg1, arg2);
}
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}"[A-z-[dDfFiIoOqQuUwWzZ]]\\d[A-z-[dDfFiIoOqQu ..." (5703 uses)
[GeneratedRegex("[A-z-[dDfFiIoOqQuUwWzZ]]\\d[A-z-[dDfFiIoOqQuU]] *\\d[A-z-[dDfFiIoOqQuU]]\\d\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.CultureInvariant)] }
// Match if at a word boundary.
- if (!Utilities.IsBoundary(inputSpan, pos + 6))
+ if (!Utilities.IsPostWordCharBoundary(inputSpan, pos + 6))
{
return false; // The input didn't match.
}
/// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
- /// <summary>Determines whether the specified index is a boundary.</summary>
+ /// <summary>Determines whether the specified index is a boundary word character.</summary>
+ /// <remarks>This is the same as \w plus U+200C ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)
+ internal static bool IsBoundaryWordChar(char ch)
{
- int indexMinus1 = index - 1;
- return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=
- ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));
-
- static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\u200C' | ch == '\u200D');
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
+ int chDiv8 = ch >> 3;
+ return (uint)chDiv8 < (uint)ascii.Length ?
+ (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
+ ((WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0) || (ch is '' or '');
}
- /// <summary>Determines whether the character is part of the [\w] set.</summary>
+ /// <summary>Determines whether the specified index is a boundary.</summary>
+ /// <remarks>This variant is only employed when the previous character has already been validated as a word character.</remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- internal static bool IsWordChar(char ch)
- {
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
+ internal static bool IsPostWordCharBoundary(ReadOnlySpan<char> inputSpan, int index) =>
+ ((uint)index >= (uint)inputSpan.Length || !IsBoundaryWordChar(inputSpan[index]));
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
};
- // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
- int chDiv8 = ch >> 3;
- return (uint)chDiv8 < (uint)ascii.Length ?
- (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
- (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
- }
/// <summary>Supports searching for characters in or not in "ABCEGHJKLMNPRSTVXY[\\]^_`abceghjklmnprstvxyK".</summary>
internal static readonly SearchValues<char> s_nonAscii_0DD9414ACADF36B5FCB9FD5EDD16B6170F356585861BFF97C0F99F5B6EB09472 = SearchValues.Create("ABCEGHJKLMNPRSTVXY[\\]^_`abceghjklmnprstvxyK");"^\\w+([_.-]\\w+)*$" (5006 uses)
[GeneratedRegex("^\\w+([_.-]\\w+)*$", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
StackPush(ref stack, ref pos, arg0, arg1);
}
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}"^(\\w*)=(.*?)" (3778 uses)
[GeneratedRegex("^(\\w*)=(.*?)")] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
(WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}"^(\\w+\\.)+\\w+$" (2468 uses)
[GeneratedRegex("^(\\w+\\.)+\\w+$")] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
StackPush(ref stack, ref pos, arg0, arg1);
}
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}"{(?<env>env:)??\\w+(\\s+(\\?\\?)??\\s+\\w+)??}" (2282 uses)
[GeneratedRegex("{(?<env>env:)??\\w+(\\s+(\\?\\?)??\\s+\\w+)??}")] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
StackPush(ref stack, ref pos, arg0, arg1, arg2);
}
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}", Version=\\d+.\\d+.\\d+.\\d+, Culture=\\w+, ..." (2239 uses)
[GeneratedRegex(", Version=\\d+.\\d+.\\d+.\\d+, Culture=\\w+, PublicKeyToken=\\w+")] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
(WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
}
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
+
/// <summary>Supports searching for the string ", Version=".</summary>
internal static readonly SearchValues<string> s_indexOfString_F484FBA9DDF61CC32D17E4ED223128BF4D7C62347668A9B369CE2C1E6BBB3513 = SearchValues.Create([", Version="], StringComparison.Ordinal);
}"^-+ *BEGIN (?<keyName>\\w+( \\w+)*) PRIVATE ..." (1964 uses)
[GeneratedRegex("^-+ *BEGIN (?<keyName>\\w+( \\w+)*) PRIVATE KEY *-+\\r?\\n(Proc-Type: 4,ENCRYPTED\\r?\\nDEK-Info: (?<cipherName>[A-Z0-9-]+),(?<salt>[A-F0-9]+)\\r?\\n\\r?\\n)?(?<data>([a-zA-Z0-9/+=]{1,80}\\r?\\n)+)-+ *END \\k<keyName> PRIVATE KEY *-+", RegexOptions.Multiline)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
}
}
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
+
/// <summary>Supports searching for characters in or not in "0123456789ABCDEF".</summary>
internal static readonly SearchValues<char> s_asciiHexDigitsUpper = SearchValues.Create("0123456789ABCDEF");"&(?!#?\\w+;)" (1880 uses)
[GeneratedRegex("&(?!#?\\w+;)")] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
(WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}"\\A\\s*(?<name>\\w+)(\\s*\\((?<arguments>.*) ..." (1751 uses)
[GeneratedRegex("\\A\\s*(?<name>\\w+)(\\s*\\((?<arguments>.*)\\))?\\s*\\Z", RegexOptions.Singleline)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsWordChar(char ch)
{
- // Mask of Unicode categories that combine to form [\w]
- const int WordCategoriesMask =
- 1 << (int)UnicodeCategory.UppercaseLetter |
- 1 << (int)UnicodeCategory.LowercaseLetter |
- 1 << (int)UnicodeCategory.TitlecaseLetter |
- 1 << (int)UnicodeCategory.ModifierLetter |
- 1 << (int)UnicodeCategory.OtherLetter |
- 1 << (int)UnicodeCategory.NonSpacingMark |
- 1 << (int)UnicodeCategory.DecimalDigitNumber |
- 1 << (int)UnicodeCategory.ConnectorPunctuation;
-
- // Bitmap for whether each character 0 through 127 is in [\w]
- ReadOnlySpan<byte> ascii = new byte[]
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
- 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
- };
-
// If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.
+ ReadOnlySpan<byte> ascii = WordCharBitmap;
int chDiv8 = ch >> 3;
return (uint)chDiv8 < (uint)ascii.Length ?
(ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :
StackPush(ref stack, ref pos, arg0, arg1, arg2);
}
}
+
+ /// <summary>Provides a mask of Unicode categories that combine to form [\w].</summary>
+ private const int WordCategoriesMask =
+ 1 << (int)UnicodeCategory.UppercaseLetter |
+ 1 << (int)UnicodeCategory.LowercaseLetter |
+ 1 << (int)UnicodeCategory.TitlecaseLetter |
+ 1 << (int)UnicodeCategory.ModifierLetter |
+ 1 << (int)UnicodeCategory.OtherLetter |
+ 1 << (int)UnicodeCategory.NonSpacingMark |
+ 1 << (int)UnicodeCategory.DecimalDigitNumber |
+ 1 << (int)UnicodeCategory.ConnectorPunctuation;
+
+ /// <summary>Gets a bitmap for whether each character 0 through 127 is in [\w]</summary>
+ private static ReadOnlySpan<byte> WordCharBitmap => new byte[]
+ {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
+ 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
+ };
+
}
}For more diff examples, see https://gist.github.com/MihuBot/3dc0c347ab5ededb4c479718266d90f0
JIT assembly changes
Total bytes of base: 54138884
Total bytes of diff: 53799262
Total bytes of delta: -339622 (-0.63 % of base)
Total relative delta: -96.31
diff is an improvement.
relative diff is an improvement.
For a list of JIT diff regressions, see Regressions.md
For a list of JIT diff improvements, see Improvements.md
Sample source code for further analysis
const string JsonPath = "RegexResults-1303.json";
if (!File.Exists(JsonPath))
{
await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/E2rQ5ESA");
using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}
using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");
record KnownPattern(string Pattern, RegexOptions Options, int Count);
sealed class RegexEntry
{
public required KnownPattern Regex { get; set; }
public required string MainSource { get; set; }
public required string PrSource { get; set; }
public string? FullDiff { get; set; }
public string? ShortDiff { get; set; }
public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}Artifacts:
- ShortExampleDiffs.md (27 KB)
- LongExampleDiffs.md (900 KB)
- Results.zip (55 MB)
- jit-diffs.zip (390 MB)
- JitAnalyzeSummary.txt (70 KB)
- JitDiffRegressions.md (865 KB)
- LongJitDiffRegressions.md (85 MB)
- JitDiffImprovements.md (873 KB)
- LongJitDiffImprovements.md (23 MB)
Metadata
Metadata
Assignees
Labels
No labels