Skip to content

Commit

Permalink
ComboboxControl: Simplify string normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
tyxla committed Apr 19, 2024
1 parent 4b08cf1 commit 30031db
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 47 deletions.
52 changes: 5 additions & 47 deletions packages/components/src/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,64 +6,22 @@ import { paramCase } from 'change-case';

const ALL_UNICODE_DASH_CHARACTERS = new RegExp(
`[${ [
// - (hyphen-minus)
'\u002d',
// ~ (tilde)
'\u007e',
// ­ (soft hyphen)
'\u00ad',
// ֊ (armenian hyphen)
'\u058a',
// ־ (hebrew punctuation maqaf)
'\u05be',
// ᐀ (canadian syllabics hyphen)
'\u1400',
// ᠆ (mongolian todo soft hyphen)
'\u1806',
// ‐ (hyphen)
'\u2010',
// non-breaking hyphen)
'\u2011',
// ‒ (figure dash)
'\u2012',
// – (en dash)
'\u2013',
// — (em dash)
'\u2014',
// ― (horizontal bar)
'\u2015',
// ⁓ (swung dash)
'\u2053',
// superscript minus)
// ⁻ (superscript minus)
'\u207b',
// subscript minus)
// ₋ (subscript minus)
'\u208b',
// − (minus sign)
'\u2212',
// ⸗ (double oblique hyphen)
'\u2e17',
// ⸺ (two-em dash)
'\u2e3a',
// ⸻ (three-em dash)
'\u2e3b',
// 〜 (wave dash)
'\u301c',
// 〰 (wavy dash)
'\u3030',
// ゠ (katakana-hiragana double hyphen)
'\u30a0',
// ︱ (presentation form for vertical em dash)
'\ufe31',
// ︲ (presentation form for vertical en dash)
'\ufe32',
// ﹘ (small em dash)
'\ufe58',
// ﹣ (small hyphen-minus)
'\ufe63',
// - (fullwidth hyphen-minus)
'\uff0d',
// any other Unicode dash character
'\\p{Pd}',
].join( '' ) }]`,
'g'
'gu'
);

export const normalizeTextString = ( value: string ): string => {
Expand Down
62 changes: 62 additions & 0 deletions packages/components/src/utils/test/strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,67 @@ describe( 'normalizeTextString', () => {
expect( normalizeTextString( 'foo⸻bar' ) ).toBe( 'foo-bar' );
expect( normalizeTextString( 'foo゠bar' ) ).toBe( 'foo-bar' );
expect( normalizeTextString( 'foo-bar' ) ).toBe( 'foo-bar' );

const dashCharacters = [
// - (hyphen-minus)
'\u002d',
// ~ (tilde)
'\u007e',
// ­ (soft hyphen)
'\u00ad',
// ֊ (armenian hyphen)
'\u058a',
// ־ (hebrew punctuation maqaf)
'\u05be',
// ᐀ (canadian syllabics hyphen)
'\u1400',
// ᠆ (mongolian todo soft hyphen)
'\u1806',
// ‐ (hyphen)
'\u2010',
// non-breaking hyphen)
'\u2011',
// ‒ (figure dash)
'\u2012',
// – (en dash)
'\u2013',
// — (em dash)
'\u2014',
// ― (horizontal bar)
'\u2015',
// ⁓ (swung dash)
'\u2053',
// superscript minus)
'\u207b',
// subscript minus)
'\u208b',
// − (minus sign)
'\u2212',
// ⸗ (double oblique hyphen)
'\u2e17',
// ⸺ (two-em dash)
'\u2e3a',
// ⸻ (three-em dash)
'\u2e3b',
// 〜 (wave dash)
'\u301c',
// 〰 (wavy dash)
'\u3030',
// ゠ (katakana-hiragana double hyphen)
'\u30a0',
// ︱ (presentation form for vertical em dash)
'\ufe31',
// ︲ (presentation form for vertical en dash)
'\ufe32',
// ﹘ (small em dash)
'\ufe58',
// ﹣ (small hyphen-minus)
'\ufe63',
// - (fullwidth hyphen-minus)
'\uff0d',
];
expect( normalizeTextString( dashCharacters.join( '' ) ) ).toBe(
'-'.repeat( dashCharacters.length )
);
} );
} );

0 comments on commit 30031db

Please sign in to comment.