From 7ce918231946da27fcc0990f9bd3c1a142275884 Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Fri, 19 Apr 2024 13:03:37 +0300 Subject: [PATCH 1/6] ComboboxControl: Simplify string normalization --- packages/components/src/utils/strings.ts | 52 ++-------------- packages/components/src/utils/test/strings.js | 62 +++++++++++++++++++ 2 files changed, 67 insertions(+), 47 deletions(-) diff --git a/packages/components/src/utils/strings.ts b/packages/components/src/utils/strings.ts index bb43a5e53a4050..8f002e78e4f2c8 100644 --- a/packages/components/src/utils/strings.ts +++ b/packages/components/src/utils/strings.ts @@ -6,64 +6,22 @@ import { paramCase } from 'change-case'; const ALL_UNICODE_DASH_CHARACTERS = new RegExp( `[${ [ - // - (hyphen-minus) - '\u002d', // ~ (tilde) '\u007e', // ­ (soft hyphen) '\u00ad', - // ֊ (armenian hyphen) - '\u058a', - // ־ (hebrew punctuation maqaf) - '\u05be', - // ᐀ (canadian syllabics hyphen) - '\u1400', - // ᠆ (mongolian todo soft hyphen) - '\u1806', - // ‐ (hyphen) - '\u2010', - // non-breaking hyphen) - '\u2011', - // ‒ (figure dash) - '\u2012', - // – (en dash) - '\u2013', - // — (em dash) - '\u2014', - // ― (horizontal bar) - '\u2015', // ⁓ (swung dash) '\u2053', - // superscript minus) + // ⁻ (superscript minus) '\u207b', - // subscript minus) + // ₋ (subscript minus) '\u208b', // − (minus sign) '\u2212', - // ⸗ (double oblique hyphen) - '\u2e17', - // ⸺ (two-em dash) - '\u2e3a', - // ⸻ (three-em dash) - '\u2e3b', - // 〜 (wave dash) - '\u301c', - // 〰 (wavy dash) - '\u3030', - // ゠ (katakana-hiragana double hyphen) - '\u30a0', - // ︱ (presentation form for vertical em dash) - '\ufe31', - // ︲ (presentation form for vertical en dash) - '\ufe32', - // ﹘ (small em dash) - '\ufe58', - // ﹣ (small hyphen-minus) - '\ufe63', - // - (fullwidth hyphen-minus) - '\uff0d', + // any other Unicode dash character + '\\p{Pd}', ].join( '' ) }]`, - 'g' + 'gu' ); export const normalizeTextString = ( value: string ): string => { diff --git a/packages/components/src/utils/test/strings.js b/packages/components/src/utils/test/strings.js index 2c7d9641260f5f..68c881250fd109 100644 --- a/packages/components/src/utils/test/strings.js +++ b/packages/components/src/utils/test/strings.js @@ -106,5 +106,67 @@ describe( 'normalizeTextString', () => { expect( normalizeTextString( 'foo⸻bar' ) ).toBe( 'foo-bar' ); expect( normalizeTextString( 'foo゠bar' ) ).toBe( 'foo-bar' ); expect( normalizeTextString( 'foo-bar' ) ).toBe( 'foo-bar' ); + + const dashCharacters = [ + // - (hyphen-minus) + '\u002d', + // ~ (tilde) + '\u007e', + // ­ (soft hyphen) + '\u00ad', + // ֊ (armenian hyphen) + '\u058a', + // ־ (hebrew punctuation maqaf) + '\u05be', + // ᐀ (canadian syllabics hyphen) + '\u1400', + // ᠆ (mongolian todo soft hyphen) + '\u1806', + // ‐ (hyphen) + '\u2010', + // non-breaking hyphen) + '\u2011', + // ‒ (figure dash) + '\u2012', + // – (en dash) + '\u2013', + // — (em dash) + '\u2014', + // ― (horizontal bar) + '\u2015', + // ⁓ (swung dash) + '\u2053', + // superscript minus) + '\u207b', + // subscript minus) + '\u208b', + // − (minus sign) + '\u2212', + // ⸗ (double oblique hyphen) + '\u2e17', + // ⸺ (two-em dash) + '\u2e3a', + // ⸻ (three-em dash) + '\u2e3b', + // 〜 (wave dash) + '\u301c', + // 〰 (wavy dash) + '\u3030', + // ゠ (katakana-hiragana double hyphen) + '\u30a0', + // ︱ (presentation form for vertical em dash) + '\ufe31', + // ︲ (presentation form for vertical en dash) + '\ufe32', + // ﹘ (small em dash) + '\ufe58', + // ﹣ (small hyphen-minus) + '\ufe63', + // - (fullwidth hyphen-minus) + '\uff0d', + ]; + expect( normalizeTextString( dashCharacters.join( '' ) ) ).toBe( + '-'.repeat( dashCharacters.length ) + ); } ); } ); From 484a3d0eec3e48aa1eb772cfeb49d00b90d5adda Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Fri, 19 Apr 2024 13:21:42 +0300 Subject: [PATCH 2/6] CHANGELOG --- packages/components/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/components/CHANGELOG.md b/packages/components/CHANGELOG.md index d4ae5427840970..404c3a8378e39d 100644 --- a/packages/components/CHANGELOG.md +++ b/packages/components/CHANGELOG.md @@ -21,6 +21,7 @@ - `Navigator`: Navigation to the active path doesn't create a new location history ([#60561](https://github.com/WordPress/gutenberg/pull/60561)) - `FormToggle`: Forwards ref to input ([#60234](https://github.com/WordPress/gutenberg/pull/60234)). - `ToggleControl`: Forwards ref to FormToggle ([#60234](https://github.com/WordPress/gutenberg/pull/60234)). +- `ComboboxControl`: Simplify string normalization ([#60893](https://github.com/WordPress/gutenberg/pull/60893)). ### Bug Fix From 7fd5a36065c2f2ed2ed2b217f0a437c6bc83dae5 Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Mon, 22 Apr 2024 16:38:26 +0300 Subject: [PATCH 3/6] Update CHANGELOG --- packages/components/CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/components/CHANGELOG.md b/packages/components/CHANGELOG.md index 404c3a8378e39d..742670d6777f72 100644 --- a/packages/components/CHANGELOG.md +++ b/packages/components/CHANGELOG.md @@ -6,6 +6,10 @@ - `SlotFill`: fixed missing `getServerSnapshot` parameter in slot map ([#60943](https://github.com/WordPress/gutenberg/pull/60943)). +### Enhancements + +- `ComboboxControl`: Simplify string normalization ([#60893](https://github.com/WordPress/gutenberg/pull/60893)). + ## 27.4.0 (2024-04-19) ### Deprecation @@ -21,7 +25,6 @@ - `Navigator`: Navigation to the active path doesn't create a new location history ([#60561](https://github.com/WordPress/gutenberg/pull/60561)) - `FormToggle`: Forwards ref to input ([#60234](https://github.com/WordPress/gutenberg/pull/60234)). - `ToggleControl`: Forwards ref to FormToggle ([#60234](https://github.com/WordPress/gutenberg/pull/60234)). -- `ComboboxControl`: Simplify string normalization ([#60893](https://github.com/WordPress/gutenberg/pull/60893)). ### Bug Fix From b56d28bc9b028826435d582453f8c1c54edcc346 Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Mon, 22 Apr 2024 17:20:59 +0300 Subject: [PATCH 4/6] Try literal notation --- packages/components/src/utils/strings.ts | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/packages/components/src/utils/strings.ts b/packages/components/src/utils/strings.ts index 8f002e78e4f2c8..c0f22800631d6a 100644 --- a/packages/components/src/utils/strings.ts +++ b/packages/components/src/utils/strings.ts @@ -5,23 +5,7 @@ import removeAccents from 'remove-accents'; import { paramCase } from 'change-case'; const ALL_UNICODE_DASH_CHARACTERS = new RegExp( - `[${ [ - // ~ (tilde) - '\u007e', - // ­ (soft hyphen) - '\u00ad', - // ⁓ (swung dash) - '\u2053', - // ⁻ (superscript minus) - '\u207b', - // ₋ (subscript minus) - '\u208b', - // − (minus sign) - '\u2212', - // any other Unicode dash character - '\\p{Pd}', - ].join( '' ) }]`, - 'gu' + /[\u007e\u00ad\u2053\u207b\u208b\u2212\p{Pd}]/gu ); export const normalizeTextString = ( value: string ): string => { From 893f934dc9dba0db53f2de30efe7858bed4a8ee1 Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Mon, 22 Apr 2024 17:37:50 +0300 Subject: [PATCH 5/6] Add inline comment to clarify regex --- packages/components/src/utils/strings.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/components/src/utils/strings.ts b/packages/components/src/utils/strings.ts index c0f22800631d6a..e15886da61f5a6 100644 --- a/packages/components/src/utils/strings.ts +++ b/packages/components/src/utils/strings.ts @@ -4,6 +4,16 @@ import removeAccents from 'remove-accents'; import { paramCase } from 'change-case'; +/** + * All unicode characters that we consider "dash-like": + * - `\u007e`: ~ (tilde) + * - `\u00ad`: ­ (soft hyphen) + * - `\u2053`: ⁓ (swung dash) + * - `\u207b`: ⁻ (superscript minus) + * - `\u208b`: ₋ (subscript minus) + * - `\u2212`: − (minus sign) + * - `\\p{Pd}`: any other Unicode dash character + */ const ALL_UNICODE_DASH_CHARACTERS = new RegExp( /[\u007e\u00ad\u2053\u207b\u208b\u2212\p{Pd}]/gu ); From 88f608923b7c4d0a595e802b1a9b0d48b70e35fb Mon Sep 17 00:00:00 2001 From: Marin Atanasov <8436925+tyxla@users.noreply.github.com> Date: Mon, 22 Apr 2024 17:38:19 +0300 Subject: [PATCH 6/6] Fix test comments --- packages/components/src/utils/test/strings.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/components/src/utils/test/strings.js b/packages/components/src/utils/test/strings.js index 68c881250fd109..7d03075897318f 100644 --- a/packages/components/src/utils/test/strings.js +++ b/packages/components/src/utils/test/strings.js @@ -136,9 +136,9 @@ describe( 'normalizeTextString', () => { '\u2015', // ⁓ (swung dash) '\u2053', - // superscript minus) + // ⁻ (superscript minus) '\u207b', - // subscript minus) + // ₋ (subscript minus) '\u208b', // − (minus sign) '\u2212',