From e67692acb3fdd068b4e577dc9ad9fa350f2e4ca8 Mon Sep 17 00:00:00 2001 From: graphemecluster Date: Sat, 27 Apr 2024 15:12:43 +0800 Subject: [PATCH] Correct flags scanning for non-BMP characters --- src/compiler/scanner.ts | 32 ++++++++++--------- ...egularExpressionWithNonBMPFlags.errors.txt | 23 +++++++++++++ .../regularExpressionWithNonBMPFlags.js | 8 +++++ .../regularExpressionWithNonBMPFlags.symbols | 6 ++++ .../regularExpressionWithNonBMPFlags.types | 9 ++++++ .../regularExpressionWithNonBMPFlags.ts | 3 ++ 6 files changed, 66 insertions(+), 15 deletions(-) create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.js create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols create mode 100644 tests/baselines/reference/regularExpressionWithNonBMPFlags.types create mode 100644 tests/cases/compiler/regularExpressionWithNonBMPFlags.ts diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 5ae766dab8699..d85c7c9ab0d78 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2485,27 +2485,28 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; let regExpFlags = RegularExpressionFlags.None; while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = codePointAt(text, pos); if (!isIdentifierPart(ch, languageVersion)) { break; } + const size = charSize(ch); if (reportErrors) { - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (regExpFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) { - error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1); + error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, size); } else { regExpFlags |= flag; - checkRegularExpressionFlagAvailable(flag); + checkRegularExpressionFlagAvailability(flag); } } - pos++; + pos += size; } if (reportErrors) { scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => { @@ -2752,25 +2753,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = codePointAt(text, pos); if (!isIdentifierPart(ch, languageVersion)) { break; } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const size = charSize(ch); + const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch)); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, size); } else { currFlags |= flag; - checkRegularExpressionFlagAvailable(flag); + checkRegularExpressionFlagAvailability(flag); } - pos++; + pos += size; } return currFlags; } @@ -3470,7 +3472,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean }); } - function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags) { + function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags) { const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; if (availableFrom && languageVersion < availableFrom) { error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt new file mode 100644 index 0000000000000..b91d0d9c12fd1 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt @@ -0,0 +1,23 @@ +regularExpressionWithNonBMPFlags.ts(1,23): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,25): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,28): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,41): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,43): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(1,45): error TS1499: Unknown regular expression flag. + + +==== regularExpressionWithNonBMPFlags.ts (6 errors) ==== + const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + \ No newline at end of file diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.js b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js new file mode 100644 index 0000000000000..847b74684b459 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js @@ -0,0 +1,8 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +//// [regularExpressionWithNonBMPFlags.ts] +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; + + +//// [regularExpressionWithNonBMPFlags.js] +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols new file mode 100644 index 0000000000000..29c7a53335550 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols @@ -0,0 +1,6 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; +>𝘳𝘦𝘨𝘦𝘹 : Symbol(𝘳𝘦𝘨𝘦𝘹, Decl(regularExpressionWithNonBMPFlags.ts, 0, 5)) + diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.types b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types new file mode 100644 index 0000000000000..5f385d608df02 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types @@ -0,0 +1,9 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; +>𝘳𝘦𝘨𝘦𝘹 : RegExp +> : ^^^^^^ +>/(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶 : RegExp +> : ^^^^^^ + diff --git a/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts new file mode 100644 index 0000000000000..65eaca6d9cff8 --- /dev/null +++ b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts @@ -0,0 +1,3 @@ +// @target: esnext + +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;