@@ -297,3 +297,203 @@ export class BinaryHeap<T> {
297
297
return this . _items . length
298
298
}
299
299
}
300
+
301
+ export const enum EnumRunesCode {
302
+ HIGH_SURROGATE_START = 0xd800 ,
303
+ HIGH_SURROGATE_END = 0xdbff ,
304
+
305
+ LOW_SURROGATE_START = 0xdc00 ,
306
+
307
+ REGIONAL_INDICATOR_START = 0x1f1e6 ,
308
+ REGIONAL_INDICATOR_END = 0x1f1ff ,
309
+
310
+ FITZPATRICK_MODIFIER_START = 0x1f3fb ,
311
+ FITZPATRICK_MODIFIER_END = 0x1f3ff ,
312
+
313
+ VARIATION_MODIFIER_START = 0xfe00 ,
314
+ VARIATION_MODIFIER_END = 0xfe0f ,
315
+
316
+ DIACRITICAL_MARKS_START = 0x20d0 ,
317
+ DIACRITICAL_MARKS_END = 0x20ff ,
318
+
319
+ SUBDIVISION_INDICATOR_START = 0x1f3f4 ,
320
+ TAGS_START = 0xe0000 ,
321
+ TAGS_END = 0xe007f ,
322
+
323
+ ZWJ = 0x200d ,
324
+ }
325
+
326
+ export const GRAPHEMES = Object . freeze ( [
327
+ 0x0308 , // ( ◌̈ ) COMBINING DIAERESIS
328
+ 0x0937 , // ( ष ) DEVANAGARI LETTER SSA
329
+ 0x093F , // ( ि ) DEVANAGARI VOWEL SIGN I
330
+ 0x0BA8 , // ( ந ) TAMIL LETTER NA
331
+ 0x0BBF , // ( ி ) TAMIL VOWEL SIGN I
332
+ 0x0BCD , // ( ◌்) TAMIL SIGN VIRAMA
333
+ 0x0E31 , // ( ◌ั ) THAI CHARACTER MAI HAN-AKAT
334
+ 0x0E33 , // ( ำ ) THAI CHARACTER SARA AM
335
+ 0x0E40 , // ( เ ) THAI CHARACTER SARA E
336
+ 0x0E49 , // ( เ ) THAI CHARACTER MAI THO
337
+ 0x1100 , // ( ᄀ ) HANGUL CHOSEONG KIYEOK
338
+ 0x1161 , // ( ᅡ ) HANGUL JUNGSEONG A
339
+ 0x11A8 , // ( ᆨ ) HANGUL JONGSEONG KIYEOK
340
+ ] ) ;
341
+
342
+ export const enum EnumCodeUnits {
343
+ unit_1 = 1 ,
344
+ unit_2 = 2 ,
345
+ unit_4 = 4 ,
346
+ }
347
+
348
+ export function runes ( string : string ) : string [ ] {
349
+ if ( typeof string !== 'string' )
350
+ {
351
+ throw new TypeError ( 'string cannot be undefined or null' )
352
+ }
353
+ const result : string [ ] = [ ]
354
+ let i = 0
355
+ let increment = 0
356
+ while ( i < string . length )
357
+ {
358
+ increment += nextUnits ( i + increment , string )
359
+ if ( isGrapheme ( string [ i + increment ] ) )
360
+ {
361
+ increment ++
362
+ }
363
+ if ( isVariationSelector ( string [ i + increment ] ) )
364
+ {
365
+ increment ++
366
+ }
367
+ if ( isDiacriticalMark ( string [ i + increment ] ) )
368
+ {
369
+ increment ++
370
+ }
371
+ if ( isZeroWidthJoiner ( string [ i + increment ] ) )
372
+ {
373
+ increment ++
374
+ continue
375
+ }
376
+ result . push ( string . substring ( i , i + increment ) )
377
+ i += increment
378
+ increment = 0
379
+ }
380
+ return result
381
+ }
382
+
383
+ // Decide how many code units make up the current character.
384
+ // BMP characters: 1 code unit
385
+ // Non-BMP characters (represented by surrogate pairs): 2 code units
386
+ // Emoji with skin-tone modifiers: 4 code units (2 code points)
387
+ // Country flags: 4 code units (2 code points)
388
+ // Variations: 2 code units
389
+ // Subdivision flags: 14 code units (7 code points)
390
+ export function nextUnits ( i : number , string : string ) {
391
+ const current = string [ i ]
392
+ // If we don't have a value that is part of a surrogate pair, or we're at
393
+ // the end, only take the value at i
394
+ if ( ! isFirstOfSurrogatePair ( current ) || i === string . length - 1 )
395
+ {
396
+ return EnumCodeUnits . unit_1
397
+ }
398
+
399
+ const currentPair = current + string [ i + 1 ]
400
+ let nextPair = string . substring ( i + 2 , i + 5 )
401
+
402
+ // Country flags are comprised of two regional indicator symbols,
403
+ // each represented by a surrogate pair.
404
+ // See http://emojipedia.org/flags/
405
+ // If both pairs are regional indicator symbols, take 4
406
+ if ( isRegionalIndicator ( currentPair ) && isRegionalIndicator ( nextPair ) )
407
+ {
408
+ return EnumCodeUnits . unit_4
409
+ }
410
+
411
+ // https://unicode.org/emoji/charts/full-emoji-list.html#subdivision-flag
412
+ // See https://emojipedia.org/emoji-tag-sequence/
413
+ // If nextPair is in Tags(https://en.wikipedia.org/wiki/Tags_(Unicode_block)),
414
+ // then find next closest U+E007F(CANCEL TAG)
415
+ if ( isSubdivisionFlag ( currentPair ) && isSupplementarySpecialpurposePlane ( nextPair ) )
416
+ {
417
+ return string . slice ( i ) . indexOf ( String . fromCodePoint ( EnumRunesCode . TAGS_END ) ) + 2
418
+ }
419
+
420
+ // If the next pair make a Fitzpatrick skin tone
421
+ // modifier, take 4
422
+ // See http://emojipedia.org/modifiers/
423
+ // Technically, only some code points are meant to be
424
+ // combined with the skin tone modifiers. This function
425
+ // does not check the current pair to see if it is
426
+ // one of them.
427
+ if ( isFitzpatrickModifier ( nextPair ) )
428
+ {
429
+ return EnumCodeUnits . unit_4
430
+ }
431
+ return EnumCodeUnits . unit_2
432
+ }
433
+
434
+ export function isFirstOfSurrogatePair ( string : string ) {
435
+ return string && betweenInclusive ( string [ 0 ] . charCodeAt ( 0 ) , EnumRunesCode . HIGH_SURROGATE_START , EnumRunesCode . HIGH_SURROGATE_END )
436
+ }
437
+
438
+ export function isRegionalIndicator ( string : string ) {
439
+ return betweenInclusive ( codePointFromSurrogatePair ( string ) , EnumRunesCode . REGIONAL_INDICATOR_START , EnumRunesCode . REGIONAL_INDICATOR_END )
440
+ }
441
+
442
+ export function isSubdivisionFlag ( string : string ) {
443
+ return betweenInclusive ( codePointFromSurrogatePair ( string ) , EnumRunesCode . SUBDIVISION_INDICATOR_START , EnumRunesCode . SUBDIVISION_INDICATOR_START )
444
+ }
445
+
446
+ export function isFitzpatrickModifier ( string : string ) {
447
+ return betweenInclusive ( codePointFromSurrogatePair ( string ) , EnumRunesCode . FITZPATRICK_MODIFIER_START , EnumRunesCode . FITZPATRICK_MODIFIER_END )
448
+ }
449
+
450
+ export function isVariationSelector ( string : string ) {
451
+ return typeof string === 'string' && betweenInclusive ( string . charCodeAt ( 0 ) , EnumRunesCode . VARIATION_MODIFIER_START , EnumRunesCode . VARIATION_MODIFIER_END )
452
+ }
453
+
454
+ export function isDiacriticalMark ( string : string ) {
455
+ return typeof string === 'string' && betweenInclusive ( string . charCodeAt ( 0 ) , EnumRunesCode . DIACRITICAL_MARKS_START , EnumRunesCode . DIACRITICAL_MARKS_END )
456
+ }
457
+
458
+ export function isSupplementarySpecialpurposePlane ( string : string ) {
459
+ const codePoint = string . codePointAt ( 0 )
460
+ return ( typeof string === 'string' && typeof codePoint === 'number' && betweenInclusive ( codePoint , EnumRunesCode . TAGS_START , EnumRunesCode . TAGS_END ) )
461
+ }
462
+
463
+ export function isGrapheme ( string : string ) {
464
+ return typeof string === 'string' && GRAPHEMES . includes ( string . charCodeAt ( 0 ) )
465
+ }
466
+
467
+ export function isZeroWidthJoiner ( string : string ) {
468
+ return typeof string === 'string' && string . charCodeAt ( 0 ) === EnumRunesCode . ZWJ
469
+ }
470
+
471
+ export function codePointFromSurrogatePair ( pair : string ) {
472
+ const highOffset = pair . charCodeAt ( 0 ) - EnumRunesCode . HIGH_SURROGATE_START
473
+ const lowOffset = pair . charCodeAt ( 1 ) - EnumRunesCode . LOW_SURROGATE_START
474
+ return ( highOffset << 10 ) + lowOffset + 0x10000
475
+ }
476
+
477
+ export function betweenInclusive ( value : number , lower : number , upper : number ) {
478
+ return value >= lower && value <= upper
479
+ }
480
+
481
+ export function substring ( string : string , start ?: number , width ?: number ) {
482
+ const chars = runes ( string )
483
+ if ( start === undefined )
484
+ {
485
+ return string
486
+ }
487
+ if ( start >= chars . length )
488
+ {
489
+ return ''
490
+ }
491
+ const rest = chars . length - start
492
+ const stringWidth = width === undefined ? rest : width
493
+ let endIndex = start + stringWidth
494
+ if ( endIndex > ( start + rest ) )
495
+ {
496
+ endIndex = undefined
497
+ }
498
+ return chars . slice ( start , endIndex ) . join ( '' )
499
+ }
0 commit comments