From 69adf95498bab1e68de12a05da309b1cb4a447ea Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Tue, 23 Apr 2024 14:29:47 +0300 Subject: [PATCH] ComboboxControl: Simplify string normalization (#60893) * ComboboxControl: Simplify string normalization * CHANGELOG * Update CHANGELOG * Try literal notation * Add inline comment to clarify regex * Fix test comments Co-authored-by: tyxla Co-authored-by: Mamaduka --- packages/components/CHANGELOG.md | 4 ++ packages/components/src/utils/strings.ts | 70 +++---------------- packages/components/src/utils/test/strings.js | 62 ++++++++++++++++ 3 files changed, 77 insertions(+), 59 deletions(-) diff --git a/packages/components/CHANGELOG.md b/packages/components/CHANGELOG.md index 0df9c6c9df842..fe370102abfff 100644 --- a/packages/components/CHANGELOG.md +++ b/packages/components/CHANGELOG.md @@ -10,6 +10,10 @@ - `SlotFill`: fixed missing `getServerSnapshot` parameter in slot map ([#60943](https://github.com/WordPress/gutenberg/pull/60943)). +### Enhancements + +- `ComboboxControl`: Simplify string normalization ([#60893](https://github.com/WordPress/gutenberg/pull/60893)). + ## 27.4.0 (2024-04-19) ### Deprecation diff --git a/packages/components/src/utils/strings.ts b/packages/components/src/utils/strings.ts index bb43a5e53a405..e15886da61f5a 100644 --- a/packages/components/src/utils/strings.ts +++ b/packages/components/src/utils/strings.ts @@ -4,66 +4,18 @@ import removeAccents from 'remove-accents'; import { paramCase } from 'change-case'; +/** + * All unicode characters that we consider "dash-like": + * - `\u007e`: ~ (tilde) + * - `\u00ad`: ­ (soft hyphen) + * - `\u2053`: ⁓ (swung dash) + * - `\u207b`: ⁻ (superscript minus) + * - `\u208b`: ₋ (subscript minus) + * - `\u2212`: − (minus sign) + * - `\\p{Pd}`: any other Unicode dash character + */ const ALL_UNICODE_DASH_CHARACTERS = new RegExp( - `[${ [ - // - (hyphen-minus) - '\u002d', - // ~ (tilde) - '\u007e', - // ­ (soft hyphen) - '\u00ad', - // ֊ (armenian hyphen) - '\u058a', - // ־ (hebrew punctuation maqaf) - '\u05be', - // ᐀ (canadian syllabics hyphen) - '\u1400', - // ᠆ (mongolian todo soft hyphen) - '\u1806', - // ‐ (hyphen) - '\u2010', - // non-breaking hyphen) - '\u2011', - // ‒ (figure dash) - '\u2012', - // – (en dash) - '\u2013', - // — (em dash) - '\u2014', - // ― (horizontal bar) - '\u2015', - // ⁓ (swung dash) - '\u2053', - // superscript minus) - '\u207b', - // subscript minus) - '\u208b', - // − (minus sign) - '\u2212', - // ⸗ (double oblique hyphen) - '\u2e17', - // ⸺ (two-em dash) - '\u2e3a', - // ⸻ (three-em dash) - '\u2e3b', - // 〜 (wave dash) - '\u301c', - // 〰 (wavy dash) - '\u3030', - // ゠ (katakana-hiragana double hyphen) - '\u30a0', - // ︱ (presentation form for vertical em dash) - '\ufe31', - // ︲ (presentation form for vertical en dash) - '\ufe32', - // ﹘ (small em dash) - '\ufe58', - // ﹣ (small hyphen-minus) - '\ufe63', - // - (fullwidth hyphen-minus) - '\uff0d', - ].join( '' ) }]`, - 'g' + /[\u007e\u00ad\u2053\u207b\u208b\u2212\p{Pd}]/gu ); export const normalizeTextString = ( value: string ): string => { diff --git a/packages/components/src/utils/test/strings.js b/packages/components/src/utils/test/strings.js index 2c7d9641260f5..7d03075897318 100644 --- a/packages/components/src/utils/test/strings.js +++ b/packages/components/src/utils/test/strings.js @@ -106,5 +106,67 @@ describe( 'normalizeTextString', () => { expect( normalizeTextString( 'foo⸻bar' ) ).toBe( 'foo-bar' ); expect( normalizeTextString( 'foo゠bar' ) ).toBe( 'foo-bar' ); expect( normalizeTextString( 'foo-bar' ) ).toBe( 'foo-bar' ); + + const dashCharacters = [ + // - (hyphen-minus) + '\u002d', + // ~ (tilde) + '\u007e', + // ­ (soft hyphen) + '\u00ad', + // ֊ (armenian hyphen) + '\u058a', + // ־ (hebrew punctuation maqaf) + '\u05be', + // ᐀ (canadian syllabics hyphen) + '\u1400', + // ᠆ (mongolian todo soft hyphen) + '\u1806', + // ‐ (hyphen) + '\u2010', + // non-breaking hyphen) + '\u2011', + // ‒ (figure dash) + '\u2012', + // – (en dash) + '\u2013', + // — (em dash) + '\u2014', + // ― (horizontal bar) + '\u2015', + // ⁓ (swung dash) + '\u2053', + // ⁻ (superscript minus) + '\u207b', + // ₋ (subscript minus) + '\u208b', + // − (minus sign) + '\u2212', + // ⸗ (double oblique hyphen) + '\u2e17', + // ⸺ (two-em dash) + '\u2e3a', + // ⸻ (three-em dash) + '\u2e3b', + // 〜 (wave dash) + '\u301c', + // 〰 (wavy dash) + '\u3030', + // ゠ (katakana-hiragana double hyphen) + '\u30a0', + // ︱ (presentation form for vertical em dash) + '\ufe31', + // ︲ (presentation form for vertical en dash) + '\ufe32', + // ﹘ (small em dash) + '\ufe58', + // ﹣ (small hyphen-minus) + '\ufe63', + // - (fullwidth hyphen-minus) + '\uff0d', + ]; + expect( normalizeTextString( dashCharacters.join( '' ) ) ).toBe( + '-'.repeat( dashCharacters.length ) + ); } ); } );