diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 53c2278bebbc09..333d80d2536e8e 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -300,7 +300,7 @@ private static void AddIsWordCharHelper(Dictionary requiredHel "internal static bool IsWordChar(char ch)", "{", " // Mask of Unicode categories that combine to form [\\w]", - " const int WordCategories =", + " const int WordCategoriesMask =", " 1 << (int)UnicodeCategory.UppercaseLetter |", " 1 << (int)UnicodeCategory.LowercaseLetter |", " 1 << (int)UnicodeCategory.TitlecaseLetter |", @@ -321,7 +321,7 @@ private static void AddIsWordCharHelper(Dictionary requiredHel " int chDiv8 = ch >> 3;", " return (uint)chDiv8 < (uint)ascii.Length ?", " (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :", - " (WordCategories & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;", + " (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0;", "}", }); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index cc3611aaefde55..ab0dd5554313a5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -1142,14 +1142,8 @@ public static bool IsECMAWordChar(char ch) => 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07 }; - /// Determines whether a character is considered a word character for the purposes of testing the \w set. - public static bool IsWordChar(char ch) - { - // This is the same as IsBoundaryWordChar, except that IsBoundaryWordChar also - // returns true for \u200c and \u200d. - - // Mask of Unicode categories that combine to form [\\w]" - const int WordCategories = + /// Mask of Unicode categories that combine to form [\\w] + private const int WordCategoriesMask = 1 << (int)UnicodeCategory.UppercaseLetter | 1 << (int)UnicodeCategory.LowercaseLetter | 1 << (int)UnicodeCategory.TitlecaseLetter | @@ -1159,14 +1153,20 @@ public static bool IsWordChar(char ch) 1 << (int)UnicodeCategory.DecimalDigitNumber | 1 << (int)UnicodeCategory.ConnectorPunctuation; - // Bitmap for whether each character 0 through 127 is in [\\w]", - ReadOnlySpan ascii = WordCharAsciiLookup; + /// Determines whether a character is considered a word character for the purposes of testing the \w set. + public static bool IsWordChar(char ch) + { + // This is the same as IsBoundaryWordChar, except that IsBoundaryWordChar also + // returns true for \u200c and \u200d. + + // Bitmap for whether each character 0 through 127 is in [\\w] + ReadOnlySpan ascii = WordCharAsciiLookup; - // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.", - int chDiv8 = ch >> 3; - return (uint)chDiv8 < (uint)ascii.Length ? - (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 : - (WordCategories & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0; + // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category. + int chDiv8 = ch >> 3; + return (uint)chDiv8 < (uint)ascii.Length ? + (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 : + (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0; } /// Determines whether a character is considered a word character for the purposes of testing a word character boundary. @@ -1178,25 +1178,14 @@ public static bool IsBoundaryWordChar(char ch) // ZERO WIDTH NON-JOINER and U+200D ZERO WIDTH JOINER. const char ZeroWidthNonJoiner = '\u200C', ZeroWidthJoiner = '\u200D'; - // Mask of Unicode categories that combine to form [\\w]" - const int WordCategories = - 1 << (int)UnicodeCategory.UppercaseLetter | - 1 << (int)UnicodeCategory.LowercaseLetter | - 1 << (int)UnicodeCategory.TitlecaseLetter | - 1 << (int)UnicodeCategory.ModifierLetter | - 1 << (int)UnicodeCategory.OtherLetter | - 1 << (int)UnicodeCategory.NonSpacingMark | - 1 << (int)UnicodeCategory.DecimalDigitNumber | - 1 << (int)UnicodeCategory.ConnectorPunctuation; - - // Bitmap for whether each character 0 through 127 is in [\\w]", + // Bitmap for whether each character 0 through 127 is in [\\w] ReadOnlySpan ascii = WordCharAsciiLookup; - // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.", + // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category. int chDiv8 = ch >> 3; return (uint)chDiv8 < (uint)ascii.Length ? (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 : - ((WordCategories & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0 || + ((WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0 || (ch == ZeroWidthJoiner | ch == ZeroWidthNonJoiner)); } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 34e5e7f9559a7e..6f05f0f0c6d0b6 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -355,7 +355,7 @@ file static class Utilities internal static bool IsWordChar(char ch) { // Mask of Unicode categories that combine to form [\w] - const int WordCategories = + const int WordCategoriesMask = 1 << (int)UnicodeCategory.UppercaseLetter | 1 << (int)UnicodeCategory.LowercaseLetter | 1 << (int)UnicodeCategory.TitlecaseLetter | @@ -364,19 +364,19 @@ internal static bool IsWordChar(char ch) 1 << (int)UnicodeCategory.NonSpacingMark | 1 << (int)UnicodeCategory.DecimalDigitNumber | 1 << (int)UnicodeCategory.ConnectorPunctuation; - + // Bitmap for whether each character 0 through 127 is in [\w] ReadOnlySpan ascii = new byte[] { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03, 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07 }; - + // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category. int chDiv8 = ch >> 3; return (uint)chDiv8 < (uint)ascii.Length ? (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 : - (WordCategories & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0; + (WordCategoriesMask & (1 << (int)CharUnicodeInfo.GetUnicodeCategory(ch))) != 0; } /// Pushes 2 values onto the backtracking stack.