Skip to content

Commit c435526

Browse files
Fix Incorrect Disjunction Alternative Visibility
1 parent c44a057 commit c435526

20 files changed

+1221
-1246
lines changed

src/compiler/scanner.ts

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import {
2+
addRange,
23
append,
34
arrayIsEqualTo,
45
binarySearch,
@@ -12,7 +13,6 @@ import {
1213
Debug,
1314
DiagnosticMessage,
1415
Diagnostics,
15-
flatMap,
1616
forEach,
1717
getNameOfScriptTarget,
1818
getSpellingSuggestion,
@@ -32,7 +32,8 @@ import {
3232
positionIsSynthesized,
3333
PunctuationOrKeywordSyntaxKind,
3434
RegularExpressionAnyString,
35-
RegularExpressionDisjunctionScope,
35+
RegularExpressionDisjunction,
36+
RegularExpressionDisjunctionsScope,
3637
RegularExpressionFlags,
3738
RegularExpressionPattern,
3839
RegularExpressionPatternContent,
@@ -2664,12 +2665,9 @@ export function createScanner(
26642665
var groupNameReferences: (TextRange & { name: string; })[] | undefined;
26652666
/** All numeric backreferences within the regex. */
26662667
var decimalEscapes: (TextRange & { value: number; })[] | undefined;
2667-
/** A stack of scopes for disjunction, including capturing groups, non-capturing groups, lookaheads and lookbehinds. */
2668-
var disjunctionsScopeStack: (RegularExpressionDisjunctionScope | undefined)[] = [];
2669-
var topDisjunctionsScope: RegularExpressionDisjunctionScope | undefined;
2670-
/** A stack of scopes for named capturing groups. @see {scanGroupName} */
2671-
var namedCapturingGroupsScopeStack: (Set<string> | undefined)[] = [];
2672-
var topNamedCapturingGroupsScope: Set<string> | undefined;
2668+
/** A stack of scopes for disjunctions, including capturing groups, non-capturing groups, lookaheads and lookbehinds. */
2669+
var disjunctionsScopesStack: RegularExpressionDisjunctionsScope[] = [];
2670+
var topDisjunctionsScope!: RegularExpressionDisjunctionsScope;
26732671
/* eslint-enable no-var */
26742672

26752673
regExpCapturingGroups = [];
@@ -2691,21 +2689,17 @@ export function createScanner(
26912689
// Disjunction ::= Alternative ('|' Alternative)*
26922690
function scanDisjunction(isInGroup: boolean): RegularExpressionPatternUnion {
26932691
const patternUnion = new Set() as RegularExpressionPatternUnion;
2694-
disjunctionsScopeStack.push(topDisjunctionsScope);
2695-
topDisjunctionsScope = undefined;
2696-
namedCapturingGroupsScopeStack.push(topNamedCapturingGroupsScope);
2697-
topNamedCapturingGroupsScope = undefined;
2692+
(topDisjunctionsScope = [] as unknown as RegularExpressionDisjunctionsScope).currentAlternativeIndex = 0;
26982693
while (true) {
26992694
patternUnion.add(scanAlternative(isInGroup));
27002695
if (charCodeChecked(pos) !== CharacterCodes.bar) {
27012696
if (patternUnion.size > 1) {
27022697
markAllInnerPatternUnionsAsPossiblyUndefined(patternUnion);
27032698
}
2704-
topDisjunctionsScope = disjunctionsScopeStack.pop();
2705-
topNamedCapturingGroupsScope = namedCapturingGroupsScopeStack.pop();
27062699
return patternUnion;
27072700
}
27082701
pos++;
2702+
topDisjunctionsScope.currentAlternativeIndex = topDisjunctionsScope.length;
27092703
}
27102704
}
27112705

@@ -2827,25 +2821,34 @@ export function createScanner(
28272821
groupNumber = ++numberOfCapturingGroups;
28282822
isPreviousTermQuantifiable = true;
28292823
}
2830-
const patternUnion = scanDisjunction(/*isInGroup*/ true);
2824+
2825+
const disjunction: RegularExpressionDisjunction = { groupNumber, groupName };
2826+
topDisjunctionsScope.push(disjunction);
2827+
disjunctionsScopesStack.push(topDisjunctionsScope);
2828+
const patternUnion = disjunction.patternUnion = scanDisjunction(/*isInGroup*/ true);
2829+
if (isNegativeAssertion) {
2830+
// Mark all capturing groups in the negative lookahead/lookbehind just closed as possibly undefined
2831+
markAllInnerPatternUnionsAsPossiblyUndefined(patternUnion);
2832+
// Also flag them such that they won't be matched by backreferences
2833+
for (const disjunction of topDisjunctionsScope) {
2834+
disjunction.isInNegativeAssertion = true;
2835+
}
2836+
}
2837+
const currentTopDisjunctionsScope = topDisjunctionsScope;
2838+
topDisjunctionsScope = disjunctionsScopesStack.pop()!;
2839+
addRange(topDisjunctionsScope, currentTopDisjunctionsScope);
2840+
28312841
isCaseInsensitive = prevIsCaseInsensitive;
28322842
if (isPreviousTermQuantifiable) {
28332843
// not an assertion
28342844
pattern.push(patternUnion);
28352845
if (groupNumber) {
28362846
regExpCapturingGroups[groupNumber] = patternUnion;
2837-
((topDisjunctionsScope ??= {}).groups ??= [])[groupNumber] = patternUnion;
28382847
if (groupName) {
28392848
(regExpCapturingGroupSpecifiers ??= createMultiMap()).add(groupName, patternUnion);
2840-
(topDisjunctionsScope.groupSpecifiers ??= createMultiMap()).add(groupName, patternUnion);
28412849
}
28422850
}
28432851
}
2844-
else if (isNegativeAssertion) {
2845-
// Invalidate all capturing groups in the negative lookahead/lookbehind just closed
2846-
// such that they won't be matched by backreferences
2847-
topDisjunctionsScope = undefined;
2848-
}
28492852
scanExpectedChar(CharacterCodes.closeParen);
28502853
break;
28512854
case CharacterCodes.openBrace:
@@ -3029,16 +3032,24 @@ export function createScanner(
30293032
return currFlags;
30303033
}
30313034

3032-
function getBackreferencePatternUnion(selector: (disjunctionScope: RegularExpressionDisjunctionScope | undefined) => RegularExpressionPatternUnion | RegularExpressionPatternUnion[] | undefined): RegularExpressionPatternContent {
3033-
disjunctionsScopeStack.push(topDisjunctionsScope);
3034-
const capturingGroups = flatMap(disjunctionsScopeStack, selector);
3035-
disjunctionsScopeStack.pop();
3036-
if (!capturingGroups.length) return "";
3037-
const patternUnion = new Set(capturingGroups as RegularExpressionPattern) as RegularExpressionPatternUnion;
3038-
if (some(capturingGroups, patternUnion => patternUnion.isPossiblyUndefined!)) {
3039-
patternUnion.add("");
3035+
function getBackreferencePatternUnion(predicate: (disjunction: RegularExpressionDisjunction) => boolean): RegularExpressionPatternContent {
3036+
disjunctionsScopesStack.push(topDisjunctionsScope);
3037+
let patternUnion: RegularExpressionPatternUnion | undefined;
3038+
for (const disjunctionsScope of disjunctionsScopesStack) {
3039+
for (let i = disjunctionsScope.currentAlternativeIndex; i < disjunctionsScope.length; i++) {
3040+
const disjunction = disjunctionsScope[i];
3041+
if (disjunction.patternUnion && !disjunction.isInNegativeAssertion && predicate(disjunction)) {
3042+
for (const pattern of disjunction.patternUnion) {
3043+
(patternUnion ??= new Set() as RegularExpressionPatternUnion).add(pattern);
3044+
}
3045+
if (disjunction.patternUnion.isPossiblyUndefined) {
3046+
(patternUnion ??= new Set() as RegularExpressionPatternUnion).add("");
3047+
}
3048+
}
3049+
}
30403050
}
3041-
return patternUnion;
3051+
disjunctionsScopesStack.pop();
3052+
return patternUnion || "";
30423053
}
30433054

30443055
// AtomEscape ::=
@@ -3055,7 +3066,7 @@ export function createScanner(
30553066
pos++;
30563067
const groupName = scanGroupName(/*isReference*/ true);
30573068
scanExpectedChar(CharacterCodes.greaterThan);
3058-
return groupName ? getBackreferencePatternUnion(disjunctionsScope => disjunctionsScope?.groupSpecifiers?.get(groupName)) : "";
3069+
return groupName ? getBackreferencePatternUnion(disjunction => disjunction.groupName === groupName) : "";
30593070
}
30603071
error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
30613072
return getCharacterEquivalents(String.fromCharCode(ch));
@@ -3077,7 +3088,7 @@ export function createScanner(
30773088
scanDigits();
30783089
const groupNumber = +tokenValue;
30793090
decimalEscapes = append(decimalEscapes, { pos: start, end: pos, value: groupNumber });
3080-
return getBackreferencePatternUnion(disjunctionsScope => disjunctionsScope?.groups?.[groupNumber]);
3091+
return getBackreferencePatternUnion(disjunction => disjunction.groupNumber === groupNumber);
30813092
}
30823093
}
30833094

@@ -3149,19 +3160,26 @@ export function createScanner(
31493160
scanIdentifier(codePointChecked(pos), languageVersion);
31503161
if (pos === tokenStart) {
31513162
error(Diagnostics.Expected_a_capturing_group_name);
3163+
return;
31523164
}
3153-
else if (isReference) {
3165+
if (isReference) {
31543166
groupNameReferences = append(groupNameReferences, { pos: tokenStart, end: pos, name: tokenValue });
3155-
return tokenValue;
3156-
}
3157-
else if (topNamedCapturingGroupsScope?.has(tokenValue) || namedCapturingGroupsScopeStack.some(group => group?.has(tokenValue))) {
3158-
error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart);
31593167
}
31603168
else {
3161-
topNamedCapturingGroupsScope ??= new Set();
3162-
topNamedCapturingGroupsScope.add(tokenValue);
3163-
return tokenValue;
3169+
disjunctionsScopesStack.push(topDisjunctionsScope);
3170+
if (
3171+
some(disjunctionsScopesStack, disjunctionsScope => {
3172+
for (let i = disjunctionsScope.currentAlternativeIndex; i < disjunctionsScope.length; i++) {
3173+
if (disjunctionsScope[i].groupName === tokenValue) return true;
3174+
}
3175+
return false;
3176+
})
3177+
) {
3178+
error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart);
3179+
}
3180+
disjunctionsScopesStack.pop();
31643181
}
3182+
return tokenValue;
31653183
}
31663184

31673185
function isClassContentExit(ch: number) {

src/compiler/types.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2798,9 +2798,17 @@ export const enum RegularExpressionFlags {
27982798
}
27992799

28002800
/** @internal */
2801-
export interface RegularExpressionDisjunctionScope {
2802-
groups?: RegularExpressionPatternUnion[];
2803-
groupSpecifiers?: MultiMap<string, RegularExpressionPatternUnion>;
2801+
export interface RegularExpressionDisjunction {
2802+
patternUnion?: RegularExpressionPatternUnion;
2803+
groupNumber?: number;
2804+
groupName?: string;
2805+
isInNegativeAssertion?: boolean;
2806+
}
2807+
2808+
/** @internal */
2809+
export interface RegularExpressionDisjunctionsScope extends Array<RegularExpressionDisjunction> {
2810+
/** All disjunctions after this index are the ones need to be considered */
2811+
currentAlternativeIndex: number;
28042812
}
28052813

28062814
/** @internal */

tests/baselines/reference/regularExpressionLiteralTypes.errors.txt

Lines changed: 4 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,9 @@ regularExpressionLiteralTypes.ts(187,6): error TS1531: '\P' must be followed by
2222
regularExpressionLiteralTypes.ts(187,9): error TS1531: '\P' must be followed by a Unicode property value expression enclosed in braces.
2323
regularExpressionLiteralTypes.ts(188,6): error TS1535: This character cannot be escaped in a regular expression.
2424
regularExpressionLiteralTypes.ts(188,9): error TS1535: This character cannot be escaped in a regular expression.
25-
regularExpressionLiteralTypes.ts(216,20): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
26-
regularExpressionLiteralTypes.ts(218,50): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
27-
regularExpressionLiteralTypes.ts(220,55): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
28-
regularExpressionLiteralTypes.ts(222,51): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
29-
regularExpressionLiteralTypes.ts(224,56): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
30-
regularExpressionLiteralTypes.ts(226,56): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
31-
regularExpressionLiteralTypes.ts(226,98): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
32-
regularExpressionLiteralTypes.ts(226,114): error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
3325

3426

35-
==== regularExpressionLiteralTypes.ts (32 errors) ====
27+
==== regularExpressionLiteralTypes.ts (24 errors) ====
3628
// Basic tests
3729
{
3830
/foo(.*?)bar(.*)?baz/;
@@ -297,32 +289,16 @@ regularExpressionLiteralTypes.ts(226,114): error TS1515: Named capturing groups
297289
/^(?<$1>(?<$2>foo)\k<$1>|(?<$3>bar)\k<$2>)$/;
298290
// This will only match `foo` or `barbar` and not `foobar`, since they aren’t in the same alternative
299291
/(?<$$>foo)|(?<$$>bar)\k<$$>/;
300-
~~
301-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
302292
// This will only match `foofoo` or `barbar`
303293
/\k<$$>(?<$$>\k<$$>foo\k<$$>)\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)\k<$$>/;
304-
~~
305-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
306294
// Matches `foo`, `bar`, `foofoo` or `barbar`
307295
/\k<$$>?(?<$$>\k<$$>?foo\k<$$>?)\k<$$>?|\k<$$>?(?<$$>\k<$$>?bar\k<$$>?)\k<$$>?/;
308-
~~
309-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
310-
// Matches ``, `foofoo` or `barbar`
311-
/\k<$$>(?<$$>\k<$$>foo\k<$$>)?\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)?\k<$$>/;
312-
~~
313-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
314296
// Matches ``, `foo`, `bar`, `foofoo` or `barbar`
315297
/\k<$$>?(?<$$>\k<$$>?foo\k<$$>?)?\k<$$>?|\k<$$>?(?<$$>\k<$$>?bar\k<$$>?)?\k<$$>?/;
316-
~~
317-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
298+
// Matches ``, `foofoo` or `barbar`, but we don't have the ability to exclude `foo` and `bar`
299+
/\k<$$>(?<$$>\k<$$>foo\k<$$>)?\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)?\k<$$>/;
318300
// All `$$` below match the empty strings
319301
/(?<$$>f<\k<$$>?(?<f>fizz\k<$$>?(?<b>foo)\k<b>?|(?<b>foo)\k<f>?)\k<f>?>|<\k<b>?(?<b>\k<b>?(?<f>bar)|\k<f>?(?<f>bar)\k<$$>?buzz)\k<$$>?>b)/;
320-
~
321-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
322-
~
323-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
324-
~
325-
!!! error TS1515: Named capturing groups with the same name must be mutually exclusive to each other.
326302

327303
// Assertions
328304
// Positive lookaheads and lookbehinds
@@ -348,6 +324,7 @@ regularExpressionLiteralTypes.ts(226,114): error TS1515: Named capturing groups
348324
/(?=(foo)|(bar))(?<answer>\1|\2|\3|\4)(?=(foo)|(bar))/;
349325
/(?=(foo)|(bar))(?<answer>\1\2\3\4)(?=(foo)|(bar))/;
350326
// Negative lookaheads and lookbehinds
327+
// Capturing groups inside them should always be marked as possibly undefined
351328
/(?!(foo))\1/;
352329
/(?!(foo|bar))\1/;
353330
/(?!(foo)?)\1/;

tests/baselines/reference/regularExpressionLiteralTypes.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,10 @@
221221
/\k<$$>(?<$$>\k<$$>foo\k<$$>)\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)\k<$$>/;
222222
// Matches `foo`, `bar`, `foofoo` or `barbar`
223223
/\k<$$>?(?<$$>\k<$$>?foo\k<$$>?)\k<$$>?|\k<$$>?(?<$$>\k<$$>?bar\k<$$>?)\k<$$>?/;
224-
// Matches ``, `foofoo` or `barbar`
225-
/\k<$$>(?<$$>\k<$$>foo\k<$$>)?\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)?\k<$$>/;
226224
// Matches ``, `foo`, `bar`, `foofoo` or `barbar`
227225
/\k<$$>?(?<$$>\k<$$>?foo\k<$$>?)?\k<$$>?|\k<$$>?(?<$$>\k<$$>?bar\k<$$>?)?\k<$$>?/;
226+
// Matches ``, `foofoo` or `barbar`, but we don't have the ability to exclude `foo` and `bar`
227+
/\k<$$>(?<$$>\k<$$>foo\k<$$>)?\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)?\k<$$>/;
228228
// All `$$` below match the empty strings
229229
/(?<$$>f<\k<$$>?(?<f>fizz\k<$$>?(?<b>foo)\k<b>?|(?<b>foo)\k<f>?)\k<f>?>|<\k<b>?(?<b>\k<b>?(?<f>bar)|\k<f>?(?<f>bar)\k<$$>?buzz)\k<$$>?>b)/;
230230

@@ -252,6 +252,7 @@
252252
/(?=(foo)|(bar))(?<answer>\1|\2|\3|\4)(?=(foo)|(bar))/;
253253
/(?=(foo)|(bar))(?<answer>\1\2\3\4)(?=(foo)|(bar))/;
254254
// Negative lookaheads and lookbehinds
255+
// Capturing groups inside them should always be marked as possibly undefined
255256
/(?!(foo))\1/;
256257
/(?!(foo|bar))\1/;
257258
/(?!(foo)?)\1/;
@@ -478,10 +479,10 @@
478479
/\k<$$>(?<$$>\k<$$>foo\k<$$>)\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)\k<$$>/;
479480
// Matches `foo`, `bar`, `foofoo` or `barbar`
480481
/\k<$$>?(?<$$>\k<$$>?foo\k<$$>?)\k<$$>?|\k<$$>?(?<$$>\k<$$>?bar\k<$$>?)\k<$$>?/;
481-
// Matches ``, `foofoo` or `barbar`
482-
/\k<$$>(?<$$>\k<$$>foo\k<$$>)?\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)?\k<$$>/;
483482
// Matches ``, `foo`, `bar`, `foofoo` or `barbar`
484483
/\k<$$>?(?<$$>\k<$$>?foo\k<$$>?)?\k<$$>?|\k<$$>?(?<$$>\k<$$>?bar\k<$$>?)?\k<$$>?/;
484+
// Matches ``, `foofoo` or `barbar`, but we don't have the ability to exclude `foo` and `bar`
485+
/\k<$$>(?<$$>\k<$$>foo\k<$$>)?\k<$$>|\k<$$>(?<$$>\k<$$>bar\k<$$>)?\k<$$>/;
485486
// All `$$` below match the empty strings
486487
/(?<$$>f<\k<$$>?(?<f>fizz\k<$$>?(?<b>foo)\k<b>?|(?<b>foo)\k<f>?)\k<f>?>|<\k<b>?(?<b>\k<b>?(?<f>bar)|\k<f>?(?<f>bar)\k<$$>?buzz)\k<$$>?>b)/;
487488
// Assertions
@@ -508,6 +509,7 @@
508509
/(?=(foo)|(bar))(?<answer>\1|\2|\3|\4)(?=(foo)|(bar))/;
509510
/(?=(foo)|(bar))(?<answer>\1\2\3\4)(?=(foo)|(bar))/;
510511
// Negative lookaheads and lookbehinds
512+
// Capturing groups inside them should always be marked as possibly undefined
511513
/(?!(foo))\1/;
512514
/(?!(foo|bar))\1/;
513515
/(?!(foo)?)\1/;

0 commit comments

Comments
 (0)