diff --git a/index.js b/index.js index d9861fa..8a24115 100644 --- a/index.js +++ b/index.js @@ -4,6 +4,8 @@ import emojiRegex from 'emoji-regex'; const segmenter = new Intl.Segmenter(); +const defaultIgnorableCodePointRegex = /^\p{Default_Ignorable_Code_Point}$/u; + export default function stringWidth(string, options = {}) { if (typeof string !== 'string' || string.length === 0) { return 0; @@ -33,8 +35,37 @@ export default function stringWidth(string, options = {}) { continue; } + // Ignore zero-width characters + if ( + (codePoint >= 0x20_0B && codePoint <= 0x20_0F) // Zero-width space, non-joiner, joiner, left-to-right mark, right-to-left mark + || codePoint === 0xFE_FF // Zero-width no-break space + ) { + continue; + } + // Ignore combining characters - if (codePoint >= 0x3_00 && codePoint <= 0x3_6F) { + if ( + (codePoint >= 0x3_00 && codePoint <= 0x3_6F) // Combining diacritical marks + || (codePoint >= 0x1A_B0 && codePoint <= 0x1A_FF) // Combining diacritical marks extended + || (codePoint >= 0x1D_C0 && codePoint <= 0x1D_FF) // Combining diacritical marks supplement + || (codePoint >= 0x20_D0 && codePoint <= 0x20_FF) // Combining diacritical marks for symbols + || (codePoint >= 0xFE_20 && codePoint <= 0xFE_2F) // Combining half marks + ) { + continue; + } + + // Ignore surrogate pairs + if (codePoint >= 0xD8_00 && codePoint <= 0xDF_FF) { + continue; + } + + // Ignore variation selectors + if (codePoint >= 0xFE_00 && codePoint <= 0xFE_0F) { + continue; + } + + // This covers some of the above cases, but we still keep them for performance reasons. + if (defaultIgnorableCodePointRegex.test(character)) { continue; } diff --git a/test.js b/test.js index e61dc94..d62b133 100644 --- a/test.js +++ b/test.js @@ -37,6 +37,10 @@ test('ignores control characters', t => { test('handles combining characters', t => { t.is(stringWidth('x\u0300'), 1); + t.is(stringWidth('\u0300\u0301'), 0); + t.is(stringWidth('e\u0301e'), 2); + t.is(stringWidth('x\u036F'), 1); + t.is(stringWidth('\u036F\u036F'), 0); }); test('handles ZWJ characters', t => { @@ -45,3 +49,52 @@ test('handles ZWJ characters', t => { t.is(stringWidth('👩‍👩‍👦‍👦'), 2); t.is(stringWidth('👨‍❤️‍💋‍👨'), 2); }); + +test('handles zero-width characters', t => { + t.is(stringWidth('\u200B'), 0); + t.is(stringWidth('x\u200Bx'), 2); + t.is(stringWidth('\u200C'), 0); + t.is(stringWidth('x\u200Cx'), 2); + t.is(stringWidth('\u200D'), 0); + t.is(stringWidth('x\u200Dx'), 2); + t.is(stringWidth('\uFEFF'), 0); + t.is(stringWidth('x\uFEFFx'), 2); +}); + +test('handles surrogate pairs', t => { + t.is(stringWidth('\uD83D\uDE00'), 2); // 😀 + t.is(stringWidth('A\uD83D\uDE00B'), 4); +}); + +test('handles variation selectors', t => { + t.is(stringWidth('\u{1F1E6}\uFE0F'), 1); // Regional indicator symbol A with variation selector + t.is(stringWidth('A\uFE0F'), 1); + t.is(stringWidth('\uFE0F'), 0); +}); + +test('handles edge cases', t => { + t.is(stringWidth(''), 0); + t.is(stringWidth('\u200B\u200B'), 0); + t.is(stringWidth('x\u200Bx\u200B'), 2); + t.is(stringWidth('x\u0300x\u0300'), 2); + t.is(stringWidth('\uD83D\uDE00\uFE0F'), 2); // 😀 with variation selector + t.is(stringWidth('\uD83D\uDC69\u200D\uD83C\uDF93'), 2); // 👩‍🎓 + t.is(stringWidth('x\u1AB0x\u1AB0'), 2); // Combining diacritical marks extended + t.is(stringWidth('x\u1DC0x\u1DC0'), 2); // Combining diacritical marks supplement + t.is(stringWidth('x\u20D0x\u20D0'), 2); // Combining diacritical marks for symbols + t.is(stringWidth('x\uFE20x\uFE20'), 2); // Combining half marks +}); + +test('ignores default ignorable code points', t => { + t.is(stringWidth('\u2060'), 0); // Word joiner + t.is(stringWidth('\u2061'), 0); // Function application + t.is(stringWidth('\u2062'), 0); // Invisible times + t.is(stringWidth('\u2063'), 0); // Invisible separator + t.is(stringWidth('\u2064'), 0); // Invisible plus + t.is(stringWidth('\uFEFF'), 0); // Zero-width no-break space + t.is(stringWidth('x\u2060x'), 2); + t.is(stringWidth('x\u2061x'), 2); + t.is(stringWidth('x\u2062x'), 2); + t.is(stringWidth('x\u2063x'), 2); + t.is(stringWidth('x\u2064x'), 2); +});