diff --git a/packages/fns/src/getUTF16Increment.ts b/packages/fns/src/getUTF16Increment.ts new file mode 100644 index 000000000..679783771 --- /dev/null +++ b/packages/fns/src/getUTF16Increment.ts @@ -0,0 +1,15 @@ +/** + * Increment the index by 1 or 2 depending on the code point + * + * @param codePoint - The code point of the character + * @returns 1 or 2 + */ +const getUTF16Increment = (codePoint: number) => { + // The maximum code point for a single UTF-16 code unit is 0xFFFF + // If the code point is greater than 0xFFFF, it is a surrogate pair + // and we need to increment by 2 + const maxCodePoint = 0xffff; + return codePoint > maxCodePoint ? 2 : 1; +}; + +export default getUTF16Increment; diff --git a/packages/fns/src/index.ts b/packages/fns/src/index.ts index 1940ba071..f273b821c 100644 --- a/packages/fns/src/index.ts +++ b/packages/fns/src/index.ts @@ -17,3 +17,4 @@ export { default as reverse } from './reverse.js'; export { default as upperFirst } from './upperFirst.js'; export { default as without } from './without.js'; export { default as parseFloat } from './parseFloat.js'; +export { default as getUTF16Increment } from './getUTF16Increment.js'; diff --git a/packages/fns/tests/getUTF16Increment.ts b/packages/fns/tests/getUTF16Increment.ts new file mode 100644 index 000000000..2e0549c6d --- /dev/null +++ b/packages/fns/tests/getUTF16Increment.ts @@ -0,0 +1,16 @@ +import { describe, expect, test } from 'vitest'; + +import getUTF16Increment from '../src/getUTF16Increment'; + +describe('getUTF16Increment', () => { + test('returns 1 for BMP characters', () => { + expect(getUTF16Increment(0x0000)).toEqual(1); + expect(getUTF16Increment(0x007f)).toEqual(1); + expect(getUTF16Increment(0xffff)).toEqual(1); + }); + + test('returns 2 for surrogate pairs', () => { + expect(getUTF16Increment(0x10000)).toEqual(2); + expect(getUTF16Increment(0x10ffff)).toEqual(2); + }); +}); diff --git a/packages/pdfkit/src/font/afm.js b/packages/pdfkit/src/font/afm.js index 27e8f522d..7272eb8b5 100644 --- a/packages/pdfkit/src/font/afm.js +++ b/packages/pdfkit/src/font/afm.js @@ -1,5 +1,6 @@ import fs from 'fs'; import range from '../utils/range.js'; +import { getUTF16Increment } from '@react-pdf/fns'; const WIN_ANSI_MAP = { 402: 131, @@ -206,14 +207,14 @@ class AFMFont { encodeText(text) { const res = []; - for ( - let i = 0, end = text.length, asc = 0 <= end; - asc ? i < end : i > end; - asc ? i++ : i-- - ) { - let char = text.charCodeAt(i); - char = WIN_ANSI_MAP[char] || char; + let i = 0; + + while (i < text.length) { + const codePoint = text.codePointAt(i); + const char = WIN_ANSI_MAP[codePoint] || codePoint; res.push(char.toString(16)); + + i += getUTF16Increment(codePoint); } return res; @@ -221,14 +222,13 @@ class AFMFont { glyphsForString(string) { const glyphs = []; + let i = 0; + + while (i < string.length) { + const codePoint = string.codePointAt(i); + glyphs.push(this.characterToGlyph(codePoint)); - for ( - let i = 0, end = string.length, asc = 0 <= end; - asc ? i < end : i > end; - asc ? i++ : i-- - ) { - const charCode = string.charCodeAt(i); - glyphs.push(this.characterToGlyph(charCode)); + i += getUTF16Increment(codePoint); } return glyphs; diff --git a/packages/textkit/src/engines/fontSubstitution/index.ts b/packages/textkit/src/engines/fontSubstitution/index.ts index 2116a4317..20b95c546 100644 --- a/packages/textkit/src/engines/fontSubstitution/index.ts +++ b/packages/textkit/src/engines/fontSubstitution/index.ts @@ -1,4 +1,4 @@ -import { last } from '@react-pdf/fns'; +import { last, getUTF16Increment } from '@react-pdf/fns'; import { AttributedString, Font, Run } from '../../types'; const IGNORED_CODE_POINTS = [173]; @@ -49,9 +49,9 @@ const fontSubstitution = const chars = string.slice(run.start, run.end); - for (let j = 0; j < chars.length; j += 1) { - const char = chars[j]; - const codePoint = char.codePointAt(0); + let j = 0; + while (j < chars.length) { + const codePoint = chars.codePointAt(j); // If the default font does not have a glyph and the fallback font does, we use it const font = pickFontFromFontStack( codePoint, @@ -83,7 +83,9 @@ const fontSubstitution = lastIndex = index; } - index += char.length; + const charLength = getUTF16Increment(codePoint); + j += charLength; + index += charLength; } } diff --git a/packages/textkit/tests/engines/fontSubstitution.test.ts b/packages/textkit/tests/engines/fontSubstitution.test.ts index 0a76efc8f..cb59818b5 100644 --- a/packages/textkit/tests/engines/fontSubstitution.test.ts +++ b/packages/textkit/tests/engines/fontSubstitution.test.ts @@ -116,4 +116,112 @@ describe('FontSubstitution', () => { expect(string.runs[1].attributes.font).toEqual([SimplifiedChineseFont]); }); }); + + describe('Surrogate Pairs', () => { + const EmojiFont = { + name: 'EmojiFont', + unitsPerEm: 1000, + hasGlyphForCodePoint: (codePoint) => + codePoint === 0x1f600 || // 😀 Grinning Face + codePoint === 0x1f60d, // 😍 Heart Eyes + }; + + const RegularFont = { + name: 'RegularFont', + unitsPerEm: 1000, + hasGlyphForCodePoint: (codePoint) => codePoint < 0xffff, + }; + + test('should handle surrogate pairs in text', () => { + const run = { + start: 0, + end: 3, // A + surrogate pair (2 JS chars) + attributes: { + font: [RegularFont, EmojiFont], + fontSize: 12, + }, + } as any; + + // A + 😀 (Grinning Face emoji, surrogate pair) + const string = instance({ string: 'A😀', runs: [run] }); + + expect(string).toHaveProperty('string', 'A😀'); + expect(string.runs).toHaveLength(2); + + // First run is the letter "A" with RegularFont + expect(string.runs[0]).toHaveProperty('start', 0); + expect(string.runs[0]).toHaveProperty('end', 1); + expect(string.runs[0].attributes.font).toEqual([RegularFont]); + + // Second run is the emoji "😀" with EmojiFont + expect(string.runs[1]).toHaveProperty('start', 1); + expect(string.runs[1]).toHaveProperty('end', 3); // Surrogate pair takes 2 positions + expect(string.runs[1].attributes.font).toEqual([EmojiFont]); + }); + + test('should handle multiple surrogate pairs in text', () => { + const run = { + start: 0, + end: 5, // A + surrogate pair (2) + B (1) + surrogate pair (2) = 5 + attributes: { + font: [RegularFont, EmojiFont], + fontSize: 12, + }, + } as any; + + // A + 😀 (Grinning Face) + 😍 (Heart Eyes) + const string = instance({ string: 'A😀😍', runs: [run] }); + + expect(string).toHaveProperty('string', 'A😀😍'); + expect(string.runs).toHaveLength(2); + + // First run is the letter "A" with RegularFont + expect(string.runs[0]).toHaveProperty('start', 0); + expect(string.runs[0]).toHaveProperty('end', 1); + expect(string.runs[0].attributes.font).toEqual([RegularFont]); + + // Second run is both emojis with EmojiFont + expect(string.runs[1]).toHaveProperty('start', 1); + expect(string.runs[1]).toHaveProperty('end', 5); // Two surrogate pairs + expect(string.runs[1].attributes.font).toEqual([EmojiFont]); + }); + + test('should handle surrogate pairs interspersed with regular text', () => { + const run = { + start: 0, + end: 7, // A + surrogate pair (2) + B + surrogate pair (2) + C = 7 + attributes: { + font: [RegularFont, EmojiFont], + fontSize: 12, + }, + } as any; + + // A + 😀 (Grinning Face) + B + 😍 (Heart Eyes) + C + const string = instance({ string: 'A😀B😍C', runs: [run] }); + + expect(string).toHaveProperty('string', 'A😀B😍C'); + expect(string.runs).toHaveLength(5); + + // Alternating fonts for regular chars and emojis + expect(string.runs[0]).toHaveProperty('start', 0); + expect(string.runs[0]).toHaveProperty('end', 1); + expect(string.runs[0].attributes.font).toEqual([RegularFont]); + + expect(string.runs[1]).toHaveProperty('start', 1); + expect(string.runs[1]).toHaveProperty('end', 3); + expect(string.runs[1].attributes.font).toEqual([EmojiFont]); + + expect(string.runs[2]).toHaveProperty('start', 3); + expect(string.runs[2]).toHaveProperty('end', 4); + expect(string.runs[2].attributes.font).toEqual([RegularFont]); + + expect(string.runs[3]).toHaveProperty('start', 4); + expect(string.runs[3]).toHaveProperty('end', 6); + expect(string.runs[3].attributes.font).toEqual([EmojiFont]); + + expect(string.runs[4]).toHaveProperty('start', 6); + expect(string.runs[4]).toHaveProperty('end', 7); + expect(string.runs[4].attributes.font).toEqual([RegularFont]); + }); + }); }); diff --git a/packages/textkit/tests/utils/stringFromCodePoints.test.ts b/packages/textkit/tests/utils/stringFromCodePoints.test.ts index 228636918..0a69ec683 100644 --- a/packages/textkit/tests/utils/stringFromCodePoints.test.ts +++ b/packages/textkit/tests/utils/stringFromCodePoints.test.ts @@ -29,4 +29,24 @@ describe('utils stringFromCodePoints operator', () => { test('should return get correct string for even mode code points', () => { expect(stringFromCodePoints([76, 64257, 77, 64259, 78])).toBe('LfiMffiN'); }); + + test('should handle surrogate pair code points correctly', () => { + expect(stringFromCodePoints([0x1f600])).toBe(String.fromCodePoint(0x1f600)); + }); + + test('should handle multiple surrogate pair code points', () => { + expect(stringFromCodePoints([0x1f600, 0x1f60d])).toBe( + String.fromCodePoint(0x1f600) + String.fromCodePoint(0x1f60d), + ); + }); + + test('should handle mixed regular and surrogate pair code points', () => { + expect(stringFromCodePoints([65, 0x1f600, 66, 0x1f60d, 67])).toBe( + 'A' + + String.fromCodePoint(0x1f600) + + 'B' + + String.fromCodePoint(0x1f60d) + + 'C', + ); + }); });