diff --git a/.changeset/cyan-rats-attend.md b/.changeset/cyan-rats-attend.md new file mode 100644 index 000000000..a1beff6a7 --- /dev/null +++ b/.changeset/cyan-rats-attend.md @@ -0,0 +1,5 @@ +--- +"eslint-plugin-regexp": minor +--- + +Add support for `v` flag to `regexp/use-ignore-case` diff --git a/lib/rules/use-ignore-case.ts b/lib/rules/use-ignore-case.ts index 537584596..f04c4c604 100644 --- a/lib/rules/use-ignore-case.ts +++ b/lib/rules/use-ignore-case.ts @@ -1,8 +1,10 @@ -import type { CharSet } from "refa" -import { Chars, toCharSet } from "regexp-ast-analysis" +import { CharSet, JS } from "refa" +import { Chars, toUnicodeSet } from "regexp-ast-analysis" import type { CharacterClass, CharacterClassElement, + Node, + StringAlternative, } from "@eslint-community/regexpp/ast" import type { RegExpVisitor } from "@eslint-community/regexpp/visitor" import type { RegExpContext } from "../utils" @@ -18,34 +20,29 @@ import type { } from "../utils/ast-utils/pattern-source" import type { Rule } from "eslint" import { UsageOfPattern } from "../utils/get-usage-of-pattern" +import { cachedFn } from "../utils/util" -// FIXME: TS Error -// @ts-expect-error -- FIXME -const ELEMENT_ORDER: Record = { +type FlatClassElement = CharacterClassElement | StringAlternative + +const ELEMENT_ORDER: Record = { Character: 1, CharacterClassRange: 2, CharacterSet: 3, + CharacterClass: 4, + ExpressionCharacterClass: 5, + ClassStringDisjunction: 6, + StringAlternative: 7, } /** * Finds all character class elements that do not contribute to the whole. */ function findUseless( - elements: readonly CharacterClassElement[], - getCharSet: (e: CharacterClassElement) => CharSet, - other: CharSet, -): Set { - const cache = new Map() - - /** A cached version of `getCharSet` */ - function get(e: CharacterClassElement): CharSet { - let cached = cache.get(e) - if (cached === undefined) { - cached = getCharSet(e) - cache.set(e, cached) - } - return cached - } + elements: readonly FlatClassElement[], + getChars: (e: FlatClassElement) => JS.UnicodeSet, + other: JS.UnicodeSet, +): Set { + const get = cachedFn(getChars) // When searching for useless elements, we want to first // search for useless characters, then useless ranges, and @@ -55,7 +52,7 @@ function findUseless( .reverse() .sort((a, b) => ELEMENT_ORDER[a.type] - ELEMENT_ORDER[b.type]) - const useless = new Set() + const useless = new Set() for (const e of sortedElements) { const cs = get(e) @@ -88,20 +85,51 @@ function without(iter: Iterable, set: ReadonlySet): T[] { } /** - * Removes all the given ranges from the given pattern. - * - * This assumes that all ranges are disjoint + * Removes all the given nodes from the given pattern. */ function removeAll( fixer: Rule.RuleFixer, patternSource: PatternSource, - ranges: readonly PatternRange[], + nodes: readonly Node[], ) { - const sorted = [...ranges].sort((a, b) => b.start - a.start) - let pattern = patternSource.value + // we abuse CharSet to merge adjacent and overlapping ranges + const charSet = CharSet.empty(Number.MAX_SAFE_INTEGER).union( + nodes.map((n) => { + let min = n.start + let max = n.end - 1 + + if (n.type === "StringAlternative") { + const parent = n.parent + if ( + parent.alternatives.length === 1 || + parent.alternatives.every((a) => nodes.includes(a)) + ) { + // we have to remove the whole disjunction + min = parent.start + max = parent.end - 1 + } else { + const isFirst = parent.alternatives.at(0) === n + if (isFirst) { + max++ + } else { + min-- + } + } + } + + return { min, max } + }), + ) + const sorted = charSet.ranges.map( + ({ min, max }): PatternRange => ({ start: min, end: max + 1 }), + ) + let pattern = patternSource.value + let removed = 0 for (const { start, end } of sorted) { - pattern = pattern.slice(0, start) + pattern.slice(end) + pattern = + pattern.slice(0, start - removed) + pattern.slice(end - removed) + removed += end - start } const range = patternSource.getReplaceRange({ @@ -114,6 +142,23 @@ function removeAll( return null } +/** + * Adds the `i` flag to the given flags string. + */ +function getIgnoreCaseFlagsString(flags: string): string { + if (flags.includes("i")) { + return flags + } + + // keep flags sorted + for (let i = 0; i < flags.length; i++) { + if (flags[i] > "i") { + return `${flags.slice(0, i)}i${flags.slice(i)}` + } + } + return `${flags}i` +} + export default createRule("use-ignore-case", { meta: { docs: { @@ -162,27 +207,34 @@ export default createRule("use-ignore-case", { return {} } - const uselessElements: CharacterClassElement[] = [] + const uselessElements: FlatClassElement[] = [] const ccs: CharacterClass[] = [] return { onCharacterClassEnter(ccNode) { - const invariantElement = ccNode.elements.filter( + const elements = ccNode.elements.flatMap( + (e: CharacterClassElement): FlatClassElement[] => { + if (e.type === "ClassStringDisjunction") { + return e.alternatives + } + return [e] + }, + ) + const invariantElement = elements.filter( (e) => !isCaseVariant(e, flags), ) - if (invariantElement.length === ccNode.elements.length) { + if (invariantElement.length === elements.length) { // all elements are case invariant return } - const invariant = Chars.empty(flags).union( - // FIXME: TS Error - // @ts-expect-error -- FIXME - ...invariantElement.map((e) => toCharSet(e, flags)), + const empty = JS.UnicodeSet.empty(Chars.maxChar(flags)) + const invariant = empty.union( + ...invariantElement.map((e) => toUnicodeSet(e, flags)), ) let variantElements = without( - ccNode.elements, + elements, new Set(invariantElement), ) @@ -190,9 +242,7 @@ export default createRule("use-ignore-case", { // the i flag const alwaysUseless = findUseless( variantElements, - // FIXME: TS Error - // @ts-expect-error -- FIXME - (e) => toCharSet(e, flags), + (e) => toUnicodeSet(e, flags), invariant, ) @@ -203,9 +253,7 @@ export default createRule("use-ignore-case", { const iFlags = getIgnoreCaseFlags(flags) const useless = findUseless( variantElements, - // FIXME: TS Error - // @ts-expect-error -- FIXME - (e) => toCharSet(e, iFlags), + (e) => toUnicodeSet(e, iFlags), invariant, ) @@ -236,7 +284,7 @@ export default createRule("use-ignore-case", { } const flagsFix = fixReplaceFlags( - `${flagsString}i`, + getIgnoreCaseFlagsString(flagsString), false, )(fixer) if (!flagsFix) { diff --git a/lib/utils/regexp-ast/case-variation.ts b/lib/utils/regexp-ast/case-variation.ts index 3075d720c..c58500f70 100644 --- a/lib/utils/regexp-ast/case-variation.ts +++ b/lib/utils/regexp-ast/case-variation.ts @@ -8,49 +8,37 @@ import { } from "regexp-ast-analysis" import type { Alternative, - Character, + CharacterClass, CharacterClassElement, - CharacterClassRange, CharacterSet, Element, + ExpressionCharacterClass, Pattern, + StringAlternative, } from "@eslint-community/regexpp/ast" -import { assertNever } from "../util" - -const ignoreCaseFlagsCache = new WeakMap() -const caseSensitiveFlagsCache = new WeakMap() +import { assertNever, cachedFn } from "../util" /** * Returns flags equivalent to the given flags but with the `i` flag set. */ -export function getIgnoreCaseFlags(flags: ReadonlyFlags): ReadonlyFlags { - if (flags.ignoreCase) { - return flags - } - - let cached = ignoreCaseFlagsCache.get(flags) - if (cached === undefined) { - cached = toCache({ ...flags, ignoreCase: true }) - ignoreCaseFlagsCache.set(flags, cached) - } - return cached -} +export const getIgnoreCaseFlags = cachedFn( + (flags: ReadonlyFlags): ReadonlyFlags => { + return flags.ignoreCase + ? flags + : toCache({ ...flags, ignoreCase: true }) + }, +) /** * Returns flags equivalent to the given flags but without the `i` flag set. */ -export function getCaseSensitiveFlags(flags: ReadonlyFlags): ReadonlyFlags { - if (flags.ignoreCase === false) { - return flags - } - - let cached = caseSensitiveFlagsCache.get(flags) - if (cached === undefined) { - cached = toCache({ ...flags, ignoreCase: false }) - caseSensitiveFlagsCache.set(flags, cached) - } - return cached -} +export const getCaseSensitiveFlags = cachedFn( + (flags: ReadonlyFlags): ReadonlyFlags => { + return flags.ignoreCase === false + ? flags + : toCache({ ...flags, ignoreCase: false }) + }, +) /** * Returns whether the given element **will not** behave the same with or @@ -67,18 +55,28 @@ export function getCaseSensitiveFlags(flags: ReadonlyFlags): ReadonlyFlags { * - `wholeCharacterClass: false`: `isCaseVariant(/[a-zA-Z]/) -> true` */ export function isCaseVariant( - element: Element | CharacterClassElement | Alternative | Pattern, + element: + | Element + | CharacterClassElement + | StringAlternative + | Alternative + | Pattern, flags: ReadonlyFlags, wholeCharacterClass = true, ): boolean { - const { unicode = false } = flags + const unicodeLike = Boolean(flags.unicode || flags.unicodeSets) const iSet = getIgnoreCaseFlags(flags) const iUnset = getCaseSensitiveFlags(flags) /** Whether the given character class element is case variant */ function ccElementIsCaseVariant( - e: Character | CharacterClassRange | CharacterSet, + e: + | CharacterClassElement + | CharacterSet + | CharacterClass + | StringAlternative + | ExpressionCharacterClass["expression"], ): boolean { switch (e.type) { case "Character": @@ -92,7 +90,7 @@ export function isCaseVariant( switch (e.kind) { case "word": // \w which is case-variant in Unicode mode - return unicode + return unicodeLike case "property": // just check for equality return !toUnicodeSet(e, iSet).equals( @@ -103,6 +101,30 @@ export function isCaseVariant( return false } + case "CharacterClass": + if (!wholeCharacterClass) { + return e.elements.some(ccElementIsCaseVariant) + } + // just check for equality + return !toUnicodeSet(e, iSet).equals(toUnicodeSet(e, iUnset)) + + case "ExpressionCharacterClass": + return ccElementIsCaseVariant(e.expression) + + case "ClassIntersection": + case "ClassSubtraction": + return !toUnicodeSet(e, iSet).equals(toUnicodeSet(e, iUnset)) + + case "ClassStringDisjunction": + if (!wholeCharacterClass) { + return e.alternatives.some(ccElementIsCaseVariant) + } + // just check for equality + return !toUnicodeSet(e, iSet).equals(toUnicodeSet(e, iUnset)) + + case "StringAlternative": + return e.elements.some(ccElementIsCaseVariant) + default: return assertNever(e) } @@ -115,7 +137,7 @@ export function isCaseVariant( case "Assertion": // \b and \B are defined in terms of \w which is // case-variant in Unicode mode - return unicode && d.kind === "word" + return unicodeLike && d.kind === "word" case "Backreference": // we need to check whether the associated capturing group @@ -134,18 +156,13 @@ export function isCaseVariant( case "Character": case "CharacterClassRange": case "CharacterSet": - return ccElementIsCaseVariant(d) - case "CharacterClass": - if (!wholeCharacterClass) { - // FIXME: TS Error - // @ts-expect-error -- FIXME - return d.elements.some(ccElementIsCaseVariant) - } - // just check for equality - return !toUnicodeSet(d, iSet).equals( - toUnicodeSet(d, iUnset), - ) + case "ExpressionCharacterClass": + case "ClassIntersection": + case "ClassSubtraction": + case "ClassStringDisjunction": + case "StringAlternative": + return ccElementIsCaseVariant(d) default: return false @@ -154,7 +171,10 @@ export function isCaseVariant( (d) => { // don't go into character classes and ranges return ( - d.type !== "CharacterClass" && d.type !== "CharacterClassRange" + d.type !== "CharacterClass" && + d.type !== "CharacterClassRange" && + d.type !== "ExpressionCharacterClass" && + d.type !== "ClassStringDisjunction" ) }, ) diff --git a/lib/utils/util.ts b/lib/utils/util.ts index db6c66d32..9a27cbd63 100644 --- a/lib/utils/util.ts +++ b/lib/utils/util.ts @@ -4,3 +4,22 @@ export function assertNever(value: never): never { throw new Error(`Invalid value: ${value}`) } + +/** + * Returns a cached version of the given function. A `WeakMap` is used internally. + * + * For the cached function to behave correctly, the given function must be pure. + */ +export function cachedFn( + fn: (key: K) => R, +): (key: K) => R { + const cache = new WeakMap() + return (key) => { + let cached = cache.get(key) + if (cached === undefined) { + cached = fn(key) + cache.set(key, cached) + } + return cached + } +} diff --git a/tests/lib/rules/use-ignore-case.ts b/tests/lib/rules/use-ignore-case.ts index 8b01b3c97..54e942168 100644 --- a/tests/lib/rules/use-ignore-case.ts +++ b/tests/lib/rules/use-ignore-case.ts @@ -3,7 +3,7 @@ import rule from "../../../lib/rules/use-ignore-case" const tester = new RuleTester({ parserOptions: { - ecmaVersion: 2020, + ecmaVersion: "latest", sourceType: "module", }, }) @@ -16,6 +16,22 @@ tester.run("use-ignore-case", rule as any, { String.raw`/[aAb]/`, String.raw`/[aaaa]/`, + String.raw`/regexp/u`, + String.raw`/[aA]/iu`, + String.raw`/[aA]a/u`, + String.raw`/[aAb]/u`, + String.raw`/[aaaa]/u`, + String.raw`/\b[aA]/u`, + String.raw`/[a-zA-Z]/u`, + + String.raw`/regexp/v`, + String.raw`/[aA]/iv`, + String.raw`/[aA]a/v`, + String.raw`/[aAb]/v`, + String.raw`/[aaaa]/v`, + String.raw`/\b[aA]/v`, + String.raw`/[a-zA-Z]/v`, + // partial pattern String.raw`/[a-zA-Z]/.source`, ], @@ -27,6 +43,27 @@ tester.run("use-ignore-case", rule as any, { "The character class(es) '[a-zA-Z]' can be simplified using the `i` flag.", ], }, + { + code: String.raw`/[aA][aA][aA][aA][aA]/`, + output: String.raw`/[a][a][a][a][a]/i`, + errors: [ + "The character class(es) '[aA]', '[aA]', '[aA]', '[aA]', '[aA]' can be simplified using the `i` flag.", + ], + }, + { + code: String.raw`/[aA]/u`, + output: String.raw`/[a]/iu`, + errors: [ + "The character class(es) '[aA]' can be simplified using the `i` flag.", + ], + }, + { + code: String.raw`/[aA]/v`, + output: String.raw`/[a]/iv`, + errors: [ + "The character class(es) '[aA]' can be simplified using the `i` flag.", + ], + }, { code: String.raw`/\b0[xX][a-fA-F0-9]+\b/`, output: String.raw`/\b0[x][a-f0-9]+\b/i`, @@ -41,5 +78,12 @@ tester.run("use-ignore-case", rule as any, { "The character class(es) '[a-zA-Z]' can be simplified using the `i` flag.", ], }, + { + code: String.raw`/[\q{a|A}]/v`, + output: String.raw`/[\q{a}]/iv`, + errors: [ + "The character class(es) '[\\q{a|A}]' can be simplified using the `i` flag.", + ], + }, ], })