From 427fe3e9d33d1947df071dcf93cd9ebe6c760663 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sat, 16 Sep 2023 09:31:54 +0900 Subject: [PATCH 1/2] Add support for v flag to `regexp/no-dupe-characters-character-class` rule --- .../no-dupe-characters-character-class.ts | 108 ++++++++++-------- .../no-dupe-characters-character-class.ts | 23 +++- 2 files changed, 85 insertions(+), 46 deletions(-) diff --git a/lib/rules/no-dupe-characters-character-class.ts b/lib/rules/no-dupe-characters-character-class.ts index 333af9971..2db05af0e 100644 --- a/lib/rules/no-dupe-characters-character-class.ts +++ b/lib/rules/no-dupe-characters-character-class.ts @@ -8,6 +8,9 @@ import type { CharacterClassRange, CharacterSet, AnyCharacterSet, + UnicodeSetsCharacterClass, + ClassStringDisjunction, + ExpressionCharacterClass, } from "@eslint-community/regexpp/ast" import type { RegExpContext } from "../utils" import { @@ -20,8 +23,9 @@ import { import type { CharRange, CharSet } from "refa" import { JS } from "refa" import type { ReadonlyFlags } from "regexp-ast-analysis" -import { toCharSet } from "regexp-ast-analysis" +import { toCharSet, toUnicodeSet } from "regexp-ast-analysis" import { mentionChar } from "../utils/mention" +import { assertNever } from "../utils/util" interface Grouping { duplicates: { @@ -30,7 +34,13 @@ interface Grouping { }[] characters: Character[] characterRanges: CharacterClassRange[] - characterSets: (EscapeCharacterSet | UnicodePropertyCharacterSet)[] + characterSetAndClasses: ( + | EscapeCharacterSet + | UnicodePropertyCharacterSet + | UnicodeSetsCharacterClass + | ClassStringDisjunction + | ExpressionCharacterClass + )[] } /** @@ -44,9 +54,13 @@ function groupElements( const duplicates: Grouping["duplicates"] = [] const characters = new Map() const characterRanges = new Map() - const characterSets = new Map< + const characterSetAndClasses = new Map< string, - EscapeCharacterSet | UnicodePropertyCharacterSet + | EscapeCharacterSet + | UnicodePropertyCharacterSet + | UnicodeSetsCharacterClass + | ClassStringDisjunction + | ExpressionCharacterClass >() /** @@ -68,19 +82,24 @@ function groupElements( } for (const e of elements) { - // FIXME: TS Error - // @ts-expect-error -- FIXME - const charSet = toCharSet(e, flags) - if (e.type === "Character") { + const charSet = toCharSet(e, flags) const key = charSet.ranges[0].min addToGroup(characters, key, e) } else if (e.type === "CharacterClassRange") { + const charSet = toCharSet(e, flags) const key = buildRangeKey(charSet) addToGroup(characterRanges, key, e) - } else if (e.type === "CharacterSet") { + } else if ( + e.type === "CharacterSet" || + e.type === "CharacterClass" || + e.type === "ClassStringDisjunction" || + e.type === "ExpressionCharacterClass" + ) { const key = e.raw - addToGroup(characterSets, key, e) + addToGroup(characterSetAndClasses, key, e) + } else { + assertNever(e) } } @@ -88,7 +107,7 @@ function groupElements( duplicates, characters: [...characters.values()], characterRanges: [...characterRanges.values()], - characterSets: [...characterSets.values()], + characterSetAndClasses: [...characterSetAndClasses.values()], } function buildRangeKey(rangeCharSet: CharSet) { @@ -197,7 +216,10 @@ export default createRule("no-dupe-characters-character-class", { subsetElement: CharacterClassElement, element: | Exclude - | CharacterClassRange, + | CharacterClassRange + | UnicodeSetsCharacterClass + | ClassStringDisjunction + | ExpressionCharacterClass, ) { const { node, getRegexpLocation } = regexpContext @@ -254,9 +276,12 @@ export default createRule("no-dupe-characters-character-class", { duplicates, characters, characterRanges, - characterSets, + characterSetAndClasses, } = groupElements(ccNode.elements, flags) - const rangesAndSets = [...characterRanges, ...characterSets] + const elementsOtherThanCharacter = [ + ...characterRanges, + ...characterSetAndClasses, + ] // keep track of all reported subset elements const subsets = new Set() @@ -269,10 +294,10 @@ export default createRule("no-dupe-characters-character-class", { // report characters that are already matched by some range or set for (const char of characters) { - for (const other of rangesAndSets) { - // FIXME: TS Error - // @ts-expect-error -- FIXME - if (toCharSet(other, flags).has(char.value)) { + for (const other of elementsOtherThanCharacter) { + if ( + toUnicodeSet(other, flags).chars.has(char.value) + ) { reportSubset(regexpContext, char, other) subsets.add(char) break @@ -281,19 +306,15 @@ export default createRule("no-dupe-characters-character-class", { } // report character ranges and sets that are already matched by some range or set - for (const element of rangesAndSets) { - for (const other of rangesAndSets) { + for (const element of elementsOtherThanCharacter) { + for (const other of elementsOtherThanCharacter) { if (element === other || subsets.has(other)) { continue } if ( - // FIXME: TS Error - // @ts-expect-error -- FIXME - toCharSet(element, flags).isSubsetOf( - // FIXME: TS Error - // @ts-expect-error -- FIXME - toCharSet(other, flags), + toUnicodeSet(element, flags).isSubsetOf( + toUnicodeSet(other, flags), ) ) { reportSubset(regexpContext, element, other) @@ -305,34 +326,28 @@ export default createRule("no-dupe-characters-character-class", { // character ranges and sets might be a subset of a combination of other elements // e.g. `b-d` is a subset of `a-cd-f` - const characterTotal = toCharSet( + const characterTotal = toUnicodeSet( characters.filter((c) => !subsets.has(c)), flags, ) - for (const element of rangesAndSets) { + for (const element of elementsOtherThanCharacter) { if (subsets.has(element)) { continue } const totalOthers = characterTotal.union( - ...rangesAndSets + ...elementsOtherThanCharacter .filter((e) => !subsets.has(e) && e !== element) - // FIXME: TS Error - // @ts-expect-error -- FIXME - .map((e) => toCharSet(e, flags)), + .map((e) => toUnicodeSet(e, flags)), ) - // FIXME: TS Error - // @ts-expect-error -- FIXME - const elementCharSet = toCharSet(element, flags) + const elementCharSet = toUnicodeSet(element, flags) if (elementCharSet.isSubsetOf(totalOthers)) { const superSetElements = ccNode.elements .filter((e) => !subsets.has(e) && e !== element) .filter( (e) => - // FIXME: TS Error - // @ts-expect-error -- FIXME - !toCharSet(e, flags).isDisjointWith( + !toUnicodeSet(e, flags).isDisjointWith( elementCharSet, ), ) @@ -354,18 +369,20 @@ export default createRule("no-dupe-characters-character-class", { continue } - for (let j = i + 1; j < rangesAndSets.length; j++) { - const other = rangesAndSets[j] + for ( + let j = i + 1; + j < elementsOtherThanCharacter.length; + j++ + ) { + const other = elementsOtherThanCharacter[j] if (range === other || subsets.has(other)) { continue } - const intersection = toCharSet( + const intersection = toUnicodeSet( range, flags, - // FIXME: TS Error - // @ts-expect-error -- FIXME - ).intersect(toCharSet(other, flags)) + ).intersect(toUnicodeSet(other, flags)) if (intersection.isEmpty) { continue } @@ -375,7 +392,8 @@ export default createRule("no-dupe-characters-character-class", { // character range. // there is no point in reporting overlaps that can't be fixed. const interestingRanges = - intersection.ranges.filter( + // Range and string never intersect, so we can only check `chars`. + intersection.chars.ranges.filter( (r) => inRange(r, range.min.value) || inRange(r, range.max.value), diff --git a/tests/lib/rules/no-dupe-characters-character-class.ts b/tests/lib/rules/no-dupe-characters-character-class.ts index fb4854e08..43d634cc4 100644 --- a/tests/lib/rules/no-dupe-characters-character-class.ts +++ b/tests/lib/rules/no-dupe-characters-character-class.ts @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/no-dupe-characters-character-class" const tester = new RuleTester({ parserOptions: { - ecmaVersion: 2020, + ecmaVersion: "latest", sourceType: "module", }, }) @@ -21,6 +21,7 @@ tester.run("no-dupe-characters-character-class", rule as any, { "/[\\w\\p{L}]/u", "/\\p{ASCII}abc/u", String.raw`/[\u1fff-\u2020\s]/`, + String.raw`/[\q{a}\q{ab}\q{abc}[\w--[ab]][\w&&b]]/v`, // error "var r = new RegExp('[\\\\wA-Za-z0-9_][invalid');", ], @@ -685,5 +686,25 @@ tester.run("no-dupe-characters-character-class", rule as any, { output: null, errors: 1, }, + // v flags + { + code: String.raw`/[\q{a}aa-c[\w--b][\w&&a]]/v`, + output: String.raw`/[aa-c[\w--b]]/v`, + errors: [ + "'\\q{a}' is already included in 'a-c' (U+0061 - U+0063).", + "'a' (U+0061) is already included in 'a-c' (U+0061 - U+0063).", + "Unexpected overlap of 'a-c' (U+0061 - U+0063) and '[\\w--b]' was found '[ac]'.", + "'[\\w&&a]' is already included in 'a-c' (U+0061 - U+0063).", + ], + }, + { + code: String.raw`/[\q{abc}\q{abc|ab}[\q{abc}--b][\q{abc}&&\q{abc|ab}]]/v`, + output: String.raw`/[\q{abc|ab}[\q{abc}&&\q{abc|ab}]]/v`, + errors: [ + "'\\q{abc}' is already included in '\\q{abc|ab}'.", + "'[\\q{abc}--b]' is already included in '\\q{abc|ab}'.", + "'[\\q{abc}&&\\q{abc|ab}]' is already included in '\\q{abc|ab}'.", + ], + }, ], }) From 83b1d93af215782724d0c16a98c22142e6cdbd8e Mon Sep 17 00:00:00 2001 From: Yosuke Ota Date: Sat, 16 Sep 2023 09:32:49 +0900 Subject: [PATCH 2/2] Create friendly-walls-reply.md --- .changeset/friendly-walls-reply.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/friendly-walls-reply.md diff --git a/.changeset/friendly-walls-reply.md b/.changeset/friendly-walls-reply.md new file mode 100644 index 000000000..d2eeb29b0 --- /dev/null +++ b/.changeset/friendly-walls-reply.md @@ -0,0 +1,5 @@ +--- +"eslint-plugin-regexp": minor +--- + +Add support for v flag to `regexp/no-dupe-characters-character-class` rule