Skip to content

Commit 84b5484

Browse files
committed
Move cluster break scanning logic into @marijn/find-cluster-break
1 parent 3f8ae53 commit 84b5484

File tree

3 files changed

+5
-97
lines changed

3 files changed

+5
-97
lines changed

package.json

+3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
"module": "dist/index.js",
2626
"sideEffects": false,
2727
"license": "MIT",
28+
"dependencies": {
29+
"@marijn/find-cluster-break": "^1.0.0"
30+
},
2831
"devDependencies": {
2932
"@codemirror/buildhelper": "^1.0.0"
3033
},

src/char.ts

+2-54
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,4 @@
1-
// Compressed representation of the Grapheme_Cluster_Break=Extend
2-
// information from
3-
// http://www.unicode.org/Public/16.0.0/ucd/auxiliary/GraphemeBreakProperty.txt.
4-
// Each pair of elements represents a range, as an offet from the
5-
// previous range and a length. Numbers are in base-36, with the empty
6-
// string being a shorthand for 1.
7-
let extend = "lc,34,7n,7,7b,19,,,,2,,2,,,20,b,1c,l,g,,2t,7,2,6,2,2,,4,z,,u,r,2j,b,1m,9,9,,o,4,,9,,3,,5,17,3,3b,f,,w,1j,,,,4,8,4,,3,7,a,2,t,,1m,,,,2,4,8,,9,,a,2,q,,2,2,1l,,4,2,4,2,2,3,3,,u,2,3,,b,2,1l,,4,5,,2,4,,k,2,m,6,,,1m,,,2,,4,8,,7,3,a,2,u,,1n,,,,c,,9,,14,,3,,1l,3,5,3,,4,7,2,b,2,t,,1m,,2,,2,,3,,5,2,7,2,b,2,s,2,1l,2,,,2,4,8,,9,,a,2,t,,20,,4,,2,3,,,8,,29,,2,7,c,8,2q,,2,9,b,6,22,2,r,,,,,,1j,e,,5,,2,5,b,,10,9,,2u,4,,6,,2,2,2,p,2,4,3,g,4,d,,2,2,6,,f,,jj,3,qa,3,t,3,t,2,u,2,1s,2,,7,8,,2,b,9,,19,3,3b,2,y,,3a,3,4,2,9,,6,3,63,2,2,,1m,,,7,,,,,2,8,6,a,2,,1c,h,1r,4,1c,7,,,5,,14,9,c,2,w,4,2,2,,3,1k,,,2,3,,,3,1m,8,2,2,48,3,,d,,7,4,,6,,3,2,5i,1m,,5,ek,,5f,x,2da,3,3x,,2o,w,fe,6,2x,2,n9w,4,,a,w,2,28,2,7k,,3,,4,,p,2,5,,47,2,q,i,d,,12,8,p,b,1a,3,1c,,2,4,2,2,13,,1v,6,2,2,2,2,c,,8,,1b,,1f,,,3,2,2,5,2,,,16,2,8,,6m,,2,,4,,fn4,,kh,g,g,g,a6,2,gt,,6a,,45,5,1ae,3,,2,5,4,14,3,4,,4l,2,fx,4,ar,2,49,b,4w,,1i,f,1k,3,1d,4,2,2,1x,3,10,5,,8,1q,,c,2,1g,9,a,4,2,,2n,3,2,,,2,6,,4g,,3,8,l,2,1l,2,,,,,m,,e,7,3,5,5f,8,2,3,,,n,,29,,2,6,,,2,,,2,,2,6j,,2,4,6,2,,2,r,2,2d,8,2,,,2,2y,,,,2,6,,,2t,3,2,4,,5,77,9,,2,6t,,a,2,,,4,,40,4,2,2,4,,w,a,14,6,2,4,8,,9,6,2,3,1a,d,,2,ba,7,,6,,,2a,m,2,7,,2,,2,3e,6,3,,,2,,7,,,20,2,3,,,,9n,2,f0b,5,1n,7,t4,,1r,4,29,,f5k,2,43q,,,3,4,5,8,8,2,7,u,4,44,3,1iz,1j,4,1e,8,,e,,m,5,,f,11s,7,,h,2,7,,2,,5,79,7,c5,4,15s,7,31,7,240,5,gx7k,2o,3k,6o".split(",").map(s => s ? parseInt(s, 36) : 1)
8-
// Convert offsets into absolute values
9-
for (let i = 1; i < extend.length; i++) extend[i] += extend[i - 1]
10-
11-
function isExtendingChar(code: number) {
12-
for (let i = 1; i < extend.length; i += 2)
13-
if (extend[i] > code) return extend[i - 1] <= code
14-
return false
15-
}
16-
17-
function isRegionalIndicator(code: number) {
18-
return code >= 0x1F1E6 && code <= 0x1F1FF
19-
}
20-
21-
const ZWJ = 0x200d
1+
import {findClusterBreak as find} from "@marijn/find-cluster-break"
222

233
/// Returns a next grapheme cluster break _after_ (not equal to)
244
/// `pos`, if `forward` is true, or before otherwise. Returns `pos`
@@ -27,39 +7,7 @@ const ZWJ = 0x200d
277
/// `includeExtending` is true), characters joined with zero-width
288
/// joiners, and flag emoji.
299
export function findClusterBreak(str: string, pos: number, forward = true, includeExtending = true) {
30-
return (forward ? nextClusterBreak : prevClusterBreak)(str, pos, includeExtending)
31-
}
32-
33-
function nextClusterBreak(str: string, pos: number, includeExtending: boolean) {
34-
if (pos == str.length) return pos
35-
// If pos is in the middle of a surrogate pair, move to its start
36-
if (pos && surrogateLow(str.charCodeAt(pos)) && surrogateHigh(str.charCodeAt(pos - 1))) pos--
37-
let prev = codePointAt(str, pos)
38-
pos += codePointSize(prev)
39-
while (pos < str.length) {
40-
let next = codePointAt(str, pos)
41-
if (prev == ZWJ || next == ZWJ || includeExtending && isExtendingChar(next)) {
42-
pos += codePointSize(next)
43-
prev = next
44-
} else if (isRegionalIndicator(next)) {
45-
let countBefore = 0, i = pos - 2
46-
while (i >= 0 && isRegionalIndicator(codePointAt(str, i))) { countBefore++; i -= 2 }
47-
if (countBefore % 2 == 0) break
48-
else pos += 2
49-
} else {
50-
break
51-
}
52-
}
53-
return pos
54-
}
55-
56-
function prevClusterBreak(str: string, pos: number, includeExtending: boolean) {
57-
while (pos > 0) {
58-
let found = nextClusterBreak(str, pos - 2, includeExtending)
59-
if (found < pos) return found
60-
pos--
61-
}
62-
return 0
10+
return find(str, pos, forward, includeExtending)
6311
}
6412

6513
function surrogateLow(ch: number) { return ch >= 0xDC00 && ch < 0xE000 }

test/test-cluster.ts

-43
This file was deleted.

0 commit comments

Comments
 (0)