From 7f6d12b428f942a1225af9d22c603adf9e4d619d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 31 May 2018 00:27:38 +0200 Subject: [PATCH 1/5] grapheme data import --- bin/create-graphemedata.js | 124 +++++++++++++++++++++++++++++++++++++ package.json | 3 +- src/Grapheme.test.ts | 103 ++++++++++++++++++++++++++++++ src/Grapheme.ts | 88 ++++++++++++++++++++++++++ src/GraphemeData.ts | 6 ++ 5 files changed, 323 insertions(+), 1 deletion(-) create mode 100755 bin/create-graphemedata.js create mode 100644 src/Grapheme.test.ts create mode 100644 src/Grapheme.ts create mode 100644 src/GraphemeData.ts diff --git a/bin/create-graphemedata.js b/bin/create-graphemedata.js new file mode 100755 index 0000000000..772d7476a0 --- /dev/null +++ b/bin/create-graphemedata.js @@ -0,0 +1,124 @@ +#!/usr/bin/env node +'use strict'; + +const URL = 'https://www.unicode.org/Public/10.0.0/ucd/auxiliary/GraphemeBreakProperty.txt'; +const PATH = __dirname + '/../src/GraphemeData.ts'; + +const GRAPHEME_REX = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm; + +const TYPES = { + Other: 0, + L: 1, + V: 2, + T: 3, + LV: 4, + LVT: 5, + CR: 6, + LF: 7, + ZWJ: 8, + Prepend: 9, + Control: 10, + Extend: 11, + SpacingMark: 12, + E_Base: 13, + Glue_After_Zwj: 14, + E_Modifier: 15, + E_Base_GAZ: 16, + Regional_Indicator: 17 +}; + +function parseDefinitions(data) { + let codepoints = Object.create(null); + let match = null; + while (match = GRAPHEME_REX.exec(data)) { + let start = parseInt(match[1], 16); + let end = parseInt(match[2], 16) || start; + for (let i = start; i < end + 1; ++i) + codepoints[i] = match[3]; + } + return codepoints; +} + + +function createPackedBMP(codepoints, start, end) { + let type = -1; + let count = 0; + let lengths = []; + let types = []; + for (let i = start; i < end; ++i) { + let t = parseInt(TYPES[codepoints[i] || 'Other']); + if (t !== type) { + lengths.push(count); + types.push(type); + type = t; + count = 0; + } + count++; + if (count === 255) { + lengths.push(count); + types.push(type); + count = 0; + } + } + lengths.push(count); + types.push(type); + + // remove start entries + lengths.shift(); + types.shift(); + + if (types.length & 1) + types.push(0); + + let accu = 0; + let finalTypes = []; + for (let i = 0; i < types.length; ++i) { + accu <<= 4; + accu |= types[i]; + if (i & 1) { + finalTypes.push(accu); + accu = 0; + } + } + + // null terminate length values + lengths.push(0); + return new Buffer(lengths.concat(finalTypes)).toString('base64'); +} + + +function createGraphemeDataFile(url, path) { + require('https').get(url, (resp) => { + let data = ''; + resp.on('data', (chunk) => { + data += chunk; + }); + resp.on('end', () => { + const codepoints = parseDefinitions(data); + let highest = 0; + for (let el in codepoints) + highest = Math.max(highest, parseInt(el)); + + // codepoint < 12443 + const first = createPackedBMP(codepoints, 0, 12443); + // 42606 <= codepoint < 65536 + const second = createPackedBMP(codepoints, 42606, 65536); + // codepoint <= 65536 + const third = ''; //createPackedHIGH(codepoints, 65536, highest); + + // write to ts file + let final = ''; + final += `// FIRST: 0 <= codepoint < 12443\n`; + final += `export const FIRST: string = '${first}';\n`; + final += `// SECOND: 42606 <= codepoint < 65536\n`; + final += `export const SECOND: string = '${second}';\n`; + final += `// THIRD: codepoint >= 65536\n`; + final += `export const THIRD: string = '${third}';\n`; + require('fs').writeFileSync(path, final); + }); + }).on('error', (err) => { + console.log('error', err.message); + }); +} + +createGraphemeDataFile(URL, PATH); diff --git a/package.json b/package.json index 44cc2e88d2..3a62dc5029 100644 --- a/package.json +++ b/package.json @@ -55,6 +55,7 @@ "prepublish": "npm run build", "coveralls": "gulp coveralls", "webpack": "gulp webpack", - "watch": "gulp watch" + "watch": "gulp watch", + "graphemedata": "node bin/create-graphemedata" } } diff --git a/src/Grapheme.test.ts b/src/Grapheme.test.ts new file mode 100644 index 0000000000..c3d38b2974 --- /dev/null +++ b/src/Grapheme.test.ts @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { FIRST, SECOND } from './GraphemeData'; +import { loadFromPackedBMP, graphemeType } from './Grapheme'; +import * as chai from 'chai'; + +const TYPES = { + Other: 0, + L: 1, + V: 2, + T: 3, + LV: 4, + LVT: 5, + CR: 6, + LF: 7, + ZWJ: 8, + Prepend: 9, + Control: 10, + Extend: 11, + SpacingMark: 12, + E_Base: 13, + Glue_After_Zwj: 14, + E_Modifier: 15, + E_Base_GAZ: 16, + Regional_Indicator: 17 +}; + +const URL = 'https://www.unicode.org/Public/10.0.0/ucd/auxiliary/GraphemeBreakProperty.txt'; +const GRAPHEME_REX = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm; + +let CODEPOINTS = null; + +function parseDefinitions(data: string): {[key: number]: number} { + let codepoints = Object.create(null); + let match = null; + while (match = GRAPHEME_REX.exec(data)) { + let start = parseInt(match[1], 16); + let end = parseInt(match[2], 16) || start; + for (let i = start; i < end + 1; ++i) codepoints[i] = match[3]; + } + return codepoints; +} + +function loadUnicodeData(done: Function): void { + require('https').get(URL, (resp): any => { + let data = ''; + resp.on('data', (chunk): void => { + data += chunk; + }); + resp.on('end', () => { + CODEPOINTS = parseDefinitions(data); + done(); + }); + }).on('error', (err) => { + throw Error('error fetching unicode data'); + }); +} + +describe('grapheme cluster', function (): void { + before(function(done: Function): void { + loadUnicodeData(done); + }); + describe('correct GraphemeData', function(): void { + it('FIRST', function(): void { + if (!CODEPOINTS) return; + let one = loadFromPackedBMP(FIRST, 0, 12443); + for (let cp = 0; cp < 12443; ++cp) { + let fromStore = TYPES[CODEPOINTS[cp]] || 0; + let v = (cp & 1) ? one[cp >> 1] >> 4 : one[cp >> 1] & 15; + chai.expect(fromStore).equals(v); + } + }); + it('SECOND', function(): void { + if (!CODEPOINTS) return; + let one = loadFromPackedBMP(SECOND, 42606, 65536); + for (let cp = 42606; cp < 65536; ++cp) { + let fromStore = TYPES[CODEPOINTS[cp]] || 0; + let idx = cp - 42606; + let v = (idx & 1) ? one[idx >> 1] >> 4 : one[idx >> 1] & 15; + chai.expect(fromStore).equals(v); + } + }); + it('THIRD', function(): void { + if (!CODEPOINTS) return; + // TODO + }); + }); + describe('graphemeType', function(): void { + it('BMP', function(): void { + if (!CODEPOINTS) return; + for (let cp = 0; cp < 65536; ++cp) { + chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); + } + }); + it('HIGH', function(): void { + if (!CODEPOINTS) return; + // TODO + }); + }); +}); diff --git a/src/Grapheme.ts b/src/Grapheme.ts new file mode 100644 index 0000000000..4eddc9ff85 --- /dev/null +++ b/src/Grapheme.ts @@ -0,0 +1,88 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { FIRST, SECOND } from './GraphemeData'; + +export function loadFromPackedBMP(data: string, start: number, end: number): number[] | Uint8Array { + // decode base64 and split into lengths and types strings + const raw = (typeof atob === 'undefined') + // nodejs + ? new Buffer(data, 'base64').toString('binary') + // browser - FIXME: how to test this? + : atob(data); + // first occurence of 0x0 marks end of lengths (null terminated) + const lengths = raw.substring(0, raw.indexOf('\x00')); + const types = raw.substring(raw.indexOf('\x00') + 1); + + // lookup table with 2 type entries per index position + const table = (typeof Uint8Array === 'undefined') + ? new Array(((end - start) >> 1) + 1) + : new Uint8Array(((end - start) >> 1) + 1); + + // load data into lookup table + let codepointOffset = 0; + for (let chunkIdx = 0; chunkIdx < lengths.length; ++chunkIdx) { + let currentLength = lengths.charCodeAt(chunkIdx); + for (let chunkPos = 0; chunkPos < currentLength; ++chunkPos) { + let tcode = types.charCodeAt(chunkIdx >> 1); + let type = (chunkIdx & 1) ? tcode & 15 : tcode >> 4; + table[(codepointOffset + chunkPos) >> 1] |= ((codepointOffset + chunkPos) & 1) ? type << 4 : type; + } + codepointOffset += currentLength; + } + return table; +} + + +// NOTE: Types must be identical to bin/create-graphemedata.js#TYPES +const enum Types { + OTHER = 0, + L = 1, + V = 2, + T = 3, + LV = 4, + LVT = 5, + CR = 6, + LF = 7, + ZWJ = 8, + PREPEND = 9, + CONTROL = 10, + EXTEND = 11, + SPACINGMARK = 12, + E_BASE = 13, + GLUE_AFTER_ZWJ = 14, + E_MODIFIER = 15, + E_BASE_GAZ = 16, + REGIONAL_INDICATOR = 17 +} + +export const graphemeType = (function(): (codepoint: number) => Types { + let BMP_LOW = null; + let BMP_HIGH = null; + return (codepoint: number): Types => { + // ASCII printable shortcut + if (31 < codepoint && codepoint < 127) return Types.OTHER; + // BMP_LOW: 0 <= codepoint < 12443 + if (codepoint < 12443) { + let table = BMP_LOW || ((): number[] | Uint8Array => { + BMP_LOW = loadFromPackedBMP(FIRST, 0, 12443); + return BMP_LOW; + })(); + return (codepoint & 1) ? table[codepoint >> 1] >> 4 : table[codepoint >> 1] & 15; + } + // always Other: 12443 <= codepoint < 42606 + if (codepoint < 42606) return Types.OTHER; + // BMP_HIGH (CJK): 42606 <= codepoint < 65536 + if (codepoint < 65536) { + let table = BMP_HIGH || ((): number[] | Uint8Array => { + BMP_HIGH = loadFromPackedBMP(SECOND, 42606, 65536); + return BMP_HIGH; + })(); + codepoint -= 42606; + return (codepoint & 1) ? table[codepoint >> 1] >> 4 : table[codepoint >> 1] & 15; + } + // TODO codepoint > 65536 + return Types.OTHER; + }; +})(); diff --git a/src/GraphemeData.ts b/src/GraphemeData.ts new file mode 100644 index 0000000000..c9cb30d3ef --- /dev/null +++ b/src/GraphemeData.ts @@ -0,0 +1,6 @@ +// FIRST: 0 <= codepoint < 12443 +export const FIRST: string = 'CgECARJfIQ0B//9UcP8UB/8ILQEBAQIBAgEBOAYKCwEBLhUQAWUHAQEGAgIBBCEBAQEeG1sLOgkiBAEJAQMBBSsDeA4BIAE2AQEBAQMIBAECAQcKAh0BAjgBAQECBAICAgIBCQEKAh0CATgBAQMCBAICAwMBHgIDAQsCATgBAQMFAQIBAQIBFAIWBgEBAjgBAQIBBAICAgIBCAIKAh4BOwEBAQIDAwEDAQkBKAEDOgMEAQMBBAcCCwIdAQI4AQEBAQIBAgEBAgECAgcCCwIcAgI3AgEBAgQBAwEDAQEIAQoCHgJGAQQBAgMBAQEHARICPQEBAQcMCGIBAQEGAQILBkoCGwEBAQEBBAIxDgEFAQIFCwEkCQFmBAEGAQICAhcCAgQDEAQNAQEBAgYBDwFiYEhY/14D////tQMdAx0CHgJAAgEHCAECCwkBLQMBdgIiAXYDBAIDBAIBBgPbAgIBOQEBAQcBAQEBAggGCgIBMA9BBAEvAQEFAQEFAQImCQwCAR4BBAICAQM4AQECAwEBAwIwCAgCApgDAQ0BBwQBBAIBAgECxjoBBf//DQEBAQIYBzEQYCH//////zEBIgEBAVICYgEOAQEEVgH//////48DjQFgIP//LAZpAgCnpqCgoACwCwCwsLCwsJCwoLCwuQsLCwkLCwsLCwsLCwsLm8C8sMvLwLCwvAsLywwMsLCwvAsMsLCwsLCwvAsMsLwMsLCwvAsLywwMsLCwsLy8DAywsLwLwLCwsLC8Cwy8vAvAywsLC8CwvLDAy5CwsMCwvLCwywwLDLCwsMsLCwsLCwsMC8sLCwsLC8sLywywsLCwywsLASMAsAALCwsLC8vLywsLoLCwvLwMvLC8sMvLCwsLywsLC8C8vLy8CwvAy8vLC8vLy8DLywsLywsMsMsLCwAKuKCgoLAAAA0ODg4NDg0OAAAAsLCwALCw'; +// SECOND: 42606 <= codepoint < 65536 +export const SECOND: string = 'AQQBCiACUAL/EQEDAQQBFwICAVgCMhACGhI0CBkLAgwdAwMBLwECBAIBBCQBQwYCAgICDAEIAQEuATMBAQMCAgUCAQEpAQICBQEB7AIBAgECAQEBEgEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwwXBDEE//////////8I////////////////////////////////////OQH//+MQEBDPAZ4CUAwEAAsLCwsAsLCwy8DAywsLC8AQvAvLy8CwvLywsLwLCwsLCwsMvAywy8vAywRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFAgMKqqqqqgAAAAAAAAAAAAAAAAAAsACwsKCwoA'; +// THIRD: codepoint >= 65536 +export const THIRD: string = ''; From bd24a4f13260c1769b5e90bd67e554a3893b5167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 31 May 2018 00:40:39 +0200 Subject: [PATCH 2/5] fix const declarations --- src/Grapheme.test.ts | 21 +++++++++++---------- src/Grapheme.ts | 10 +++++----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/Grapheme.test.ts b/src/Grapheme.test.ts index c3d38b2974..09e151fadf 100644 --- a/src/Grapheme.test.ts +++ b/src/Grapheme.test.ts @@ -34,11 +34,11 @@ const GRAPHEME_REX = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm; let CODEPOINTS = null; function parseDefinitions(data: string): {[key: number]: number} { - let codepoints = Object.create(null); + const codepoints = Object.create(null); let match = null; while (match = GRAPHEME_REX.exec(data)) { - let start = parseInt(match[1], 16); - let end = parseInt(match[2], 16) || start; + const start = parseInt(match[1], 16); + const end = parseInt(match[2], 16) || start; for (let i = start; i < end + 1; ++i) codepoints[i] = match[3]; } return codepoints; @@ -61,25 +61,26 @@ function loadUnicodeData(done: Function): void { describe('grapheme cluster', function (): void { before(function(done: Function): void { + this.timeout(5000); loadUnicodeData(done); }); describe('correct GraphemeData', function(): void { it('FIRST', function(): void { if (!CODEPOINTS) return; - let one = loadFromPackedBMP(FIRST, 0, 12443); + const one = loadFromPackedBMP(FIRST, 0, 12443); for (let cp = 0; cp < 12443; ++cp) { - let fromStore = TYPES[CODEPOINTS[cp]] || 0; - let v = (cp & 1) ? one[cp >> 1] >> 4 : one[cp >> 1] & 15; + const fromStore = TYPES[CODEPOINTS[cp]] || 0; + const v = (cp & 1) ? one[cp >> 1] >> 4 : one[cp >> 1] & 15; chai.expect(fromStore).equals(v); } }); it('SECOND', function(): void { if (!CODEPOINTS) return; - let one = loadFromPackedBMP(SECOND, 42606, 65536); + const one = loadFromPackedBMP(SECOND, 42606, 65536); for (let cp = 42606; cp < 65536; ++cp) { - let fromStore = TYPES[CODEPOINTS[cp]] || 0; - let idx = cp - 42606; - let v = (idx & 1) ? one[idx >> 1] >> 4 : one[idx >> 1] & 15; + const fromStore = TYPES[CODEPOINTS[cp]] || 0; + const idx = cp - 42606; + const v = (idx & 1) ? one[idx >> 1] >> 4 : one[idx >> 1] & 15; chai.expect(fromStore).equals(v); } }); diff --git a/src/Grapheme.ts b/src/Grapheme.ts index 4eddc9ff85..c7ad07adbc 100644 --- a/src/Grapheme.ts +++ b/src/Grapheme.ts @@ -23,10 +23,10 @@ export function loadFromPackedBMP(data: string, start: number, end: number): num // load data into lookup table let codepointOffset = 0; for (let chunkIdx = 0; chunkIdx < lengths.length; ++chunkIdx) { - let currentLength = lengths.charCodeAt(chunkIdx); + const currentLength = lengths.charCodeAt(chunkIdx); for (let chunkPos = 0; chunkPos < currentLength; ++chunkPos) { - let tcode = types.charCodeAt(chunkIdx >> 1); - let type = (chunkIdx & 1) ? tcode & 15 : tcode >> 4; + const tcode = types.charCodeAt(chunkIdx >> 1); + const type = (chunkIdx & 1) ? tcode & 15 : tcode >> 4; table[(codepointOffset + chunkPos) >> 1] |= ((codepointOffset + chunkPos) & 1) ? type << 4 : type; } codepointOffset += currentLength; @@ -65,7 +65,7 @@ export const graphemeType = (function(): (codepoint: number) => Types { if (31 < codepoint && codepoint < 127) return Types.OTHER; // BMP_LOW: 0 <= codepoint < 12443 if (codepoint < 12443) { - let table = BMP_LOW || ((): number[] | Uint8Array => { + const table = BMP_LOW || ((): number[] | Uint8Array => { BMP_LOW = loadFromPackedBMP(FIRST, 0, 12443); return BMP_LOW; })(); @@ -75,7 +75,7 @@ export const graphemeType = (function(): (codepoint: number) => Types { if (codepoint < 42606) return Types.OTHER; // BMP_HIGH (CJK): 42606 <= codepoint < 65536 if (codepoint < 65536) { - let table = BMP_HIGH || ((): number[] | Uint8Array => { + const table = BMP_HIGH || ((): number[] | Uint8Array => { BMP_HIGH = loadFromPackedBMP(SECOND, 42606, 65536); return BMP_HIGH; })(); From a64e839609ae4a95bf823dcd703a23cc9781c634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 3 Jun 2018 02:13:03 +0200 Subject: [PATCH 3/5] import SMP and SSP data --- bin/create-graphemedata.js | 53 ++++++++++++++++++++++++-- src/Grapheme.test.ts | 38 +++++-------------- src/Grapheme.ts | 77 ++++++++++++++++++++++++++------------ src/GraphemeData.ts | 6 ++- 4 files changed, 117 insertions(+), 57 deletions(-) diff --git a/bin/create-graphemedata.js b/bin/create-graphemedata.js index 772d7476a0..3cdb5f9e9b 100755 --- a/bin/create-graphemedata.js +++ b/bin/create-graphemedata.js @@ -53,12 +53,12 @@ function createPackedBMP(codepoints, start, end) { type = t; count = 0; } - count++; if (count === 255) { lengths.push(count); types.push(type); count = 0; } + count++; } lengths.push(count); types.push(type); @@ -86,6 +86,47 @@ function createPackedBMP(codepoints, start, end) { return new Buffer(lengths.concat(finalTypes)).toString('base64'); } +function createPackedHIGH(codepoints, plane, start, end) { + start = start + 65536 * plane; + end = end + 65536 * plane; + let length = 0; + let type = -1; + const segments = []; + let segmentStart = -1; + for (let i = start; i < end; ++i) { + let t = parseInt(TYPES[codepoints[i] || 'Other']); + if (t !== type) { + // end of segment reached + // only push non Other segments + if (type) segments.push([segmentStart, length, type]); + segmentStart = i; + length = 0; + type = t; + } + if (length === 255) { + if (type) { + segments.push([segmentStart, length, type]); + segmentStart = i; + length = 0; + } + } + length++; + } + if (type) segments.push([segmentStart, length, type]); + segments.shift(); + console.log(segments); + + // write to byte typed + let final = []; + for (let i = 0; i < segments.length; ++i) { + final.push(segments[i][0] >> 8); + final.push(segments[i][0] & 255); + final.push(segments[i][1]); + final.push(segments[i][2]); + } + return new Buffer(final).toString('base64'); +} + function createGraphemeDataFile(url, path) { require('https').get(url, (resp) => { @@ -103,8 +144,10 @@ function createGraphemeDataFile(url, path) { const first = createPackedBMP(codepoints, 0, 12443); // 42606 <= codepoint < 65536 const second = createPackedBMP(codepoints, 42606, 65536); - // codepoint <= 65536 - const third = ''; //createPackedHIGH(codepoints, 65536, highest); + // Supplementary Multilingual Plane (1): 0 <= codepoint < 63966 + const third = createPackedHIGH(codepoints, 1, 0, 63966); + // Supplement­ary Special-purpose Plane (14): 0 <= codepoint < highest + 1 + const fourth = createPackedHIGH(codepoints, 14, 0, highest + 1); // write to ts file let final = ''; @@ -112,8 +155,10 @@ function createGraphemeDataFile(url, path) { final += `export const FIRST: string = '${first}';\n`; final += `// SECOND: 42606 <= codepoint < 65536\n`; final += `export const SECOND: string = '${second}';\n`; - final += `// THIRD: codepoint >= 65536\n`; + final += `// THIRD: Supplementary Multilingual Plane (1) 0 <= codepoint < 63966\n`; final += `export const THIRD: string = '${third}';\n`; + final += `// FOURTH: Supplement­ary Special-purpose Plane (14) 0 <= codepoint <= highest\n`; + final += `export const FOURTH: string = '${fourth}';\n`; require('fs').writeFileSync(path, final); }); }).on('error', (err) => { diff --git a/src/Grapheme.test.ts b/src/Grapheme.test.ts index 09e151fadf..e89b2a69db 100644 --- a/src/Grapheme.test.ts +++ b/src/Grapheme.test.ts @@ -3,8 +3,7 @@ * @license MIT */ -import { FIRST, SECOND } from './GraphemeData'; -import { loadFromPackedBMP, graphemeType } from './Grapheme'; +import { graphemeType } from './Grapheme'; import * as chai from 'chai'; const TYPES = { @@ -64,41 +63,24 @@ describe('grapheme cluster', function (): void { this.timeout(5000); loadUnicodeData(done); }); - describe('correct GraphemeData', function(): void { - it('FIRST', function(): void { + describe('graphemeType', function(): void { + it('BMP (0)', function(): void { if (!CODEPOINTS) return; - const one = loadFromPackedBMP(FIRST, 0, 12443); - for (let cp = 0; cp < 12443; ++cp) { - const fromStore = TYPES[CODEPOINTS[cp]] || 0; - const v = (cp & 1) ? one[cp >> 1] >> 4 : one[cp >> 1] & 15; - chai.expect(fromStore).equals(v); + for (let cp = 0; cp < 65536; ++cp) { + chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); } }); - it('SECOND', function(): void { + it('SMP (1)', function(): void { if (!CODEPOINTS) return; - const one = loadFromPackedBMP(SECOND, 42606, 65536); - for (let cp = 42606; cp < 65536; ++cp) { - const fromStore = TYPES[CODEPOINTS[cp]] || 0; - const idx = cp - 42606; - const v = (idx & 1) ? one[idx >> 1] >> 4 : one[idx >> 1] & 15; - chai.expect(fromStore).equals(v); + for (let cp = 65536; cp < 2 * 65536; ++cp) { + chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); } }); - it('THIRD', function(): void { + it('SSP (14)', function(): void { if (!CODEPOINTS) return; - // TODO - }); - }); - describe('graphemeType', function(): void { - it('BMP', function(): void { - if (!CODEPOINTS) return; - for (let cp = 0; cp < 65536; ++cp) { + for (let cp = 14 * 65536; cp < 15 * 65536; ++cp) { chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); } }); - it('HIGH', function(): void { - if (!CODEPOINTS) return; - // TODO - }); }); }); diff --git a/src/Grapheme.ts b/src/Grapheme.ts index c7ad07adbc..e83e2252a6 100644 --- a/src/Grapheme.ts +++ b/src/Grapheme.ts @@ -2,9 +2,30 @@ * Copyright (c) 2018 The xterm.js authors. All rights reserved. * @license MIT */ -import { FIRST, SECOND } from './GraphemeData'; +import { FIRST, SECOND, THIRD, FOURTH } from './GraphemeData'; -export function loadFromPackedBMP(data: string, start: number, end: number): number[] | Uint8Array { +const enum Types { + OTHER = 0, + L = 1, + V = 2, + T = 3, + LV = 4, + LVT = 5, + CR = 6, + LF = 7, + ZWJ = 8, + PREPEND = 9, + CONTROL = 10, + EXTEND = 11, + SPACINGMARK = 12, + E_BASE = 13, + GLUE_AFTER_ZWJ = 14, + E_MODIFIER = 15, + E_BASE_GAZ = 16, + REGIONAL_INDICATOR = 17 +} + +function loadFromPackedBMP(data: string, start: number, end: number): number[] | Uint8Array { // decode base64 and split into lengths and types strings const raw = (typeof atob === 'undefined') // nodejs @@ -34,35 +55,28 @@ export function loadFromPackedBMP(data: string, start: number, end: number): num return table; } +function loadFromPackedHIGH(lookupObj: any, data: string, plane: number): void { + const raw = (typeof atob === 'undefined') + ? new Buffer(data, 'base64').toString('binary') + : atob(data); -// NOTE: Types must be identical to bin/create-graphemedata.js#TYPES -const enum Types { - OTHER = 0, - L = 1, - V = 2, - T = 3, - LV = 4, - LVT = 5, - CR = 6, - LF = 7, - ZWJ = 8, - PREPEND = 9, - CONTROL = 10, - EXTEND = 11, - SPACINGMARK = 12, - E_BASE = 13, - GLUE_AFTER_ZWJ = 14, - E_MODIFIER = 15, - E_BASE_GAZ = 16, - REGIONAL_INDICATOR = 17 + // data bytes: [codepoint_high, codepoint_low, length, type] + for (let i = 0; i < raw.length; i += 4) { + let codepoint = (raw.charCodeAt(i) << 8) + raw.charCodeAt(i + 1) + 65536 * plane; + let end = raw.charCodeAt(i + 2) + codepoint; + let type = raw.charCodeAt(i + 3); + for (let cp = codepoint; cp < end; ++cp) lookupObj[cp] = type; + } } export const graphemeType = (function(): (codepoint: number) => Types { let BMP_LOW = null; let BMP_HIGH = null; + let HIGH = null; return (codepoint: number): Types => { // ASCII printable shortcut if (31 < codepoint && codepoint < 127) return Types.OTHER; + // BMP_LOW: 0 <= codepoint < 12443 if (codepoint < 12443) { const table = BMP_LOW || ((): number[] | Uint8Array => { @@ -71,8 +85,10 @@ export const graphemeType = (function(): (codepoint: number) => Types { })(); return (codepoint & 1) ? table[codepoint >> 1] >> 4 : table[codepoint >> 1] & 15; } + // always Other: 12443 <= codepoint < 42606 if (codepoint < 42606) return Types.OTHER; + // BMP_HIGH (CJK): 42606 <= codepoint < 65536 if (codepoint < 65536) { const table = BMP_HIGH || ((): number[] | Uint8Array => { @@ -82,7 +98,22 @@ export const graphemeType = (function(): (codepoint: number) => Types { codepoint -= 42606; return (codepoint & 1) ? table[codepoint >> 1] >> 4 : table[codepoint >> 1] & 15; } - // TODO codepoint > 65536 + + // codepoint > 65536 + // 129502 highest in SMP (Plane 1) + // 917504 lowest in SSP (Plane 14) + // 921599 highest in SSP + if (codepoint < 129503 || (917504 <= codepoint && codepoint < 921600)) { + const lookupObj = HIGH || ((): any => { + HIGH = Object.create(null); + loadFromPackedHIGH(HIGH, THIRD, 1); + loadFromPackedHIGH(HIGH, FOURTH, 14); + return HIGH; + })(); + return lookupObj[codepoint] || Types.OTHER; + } + + // all other codepoints default to Other return Types.OTHER; }; })(); diff --git a/src/GraphemeData.ts b/src/GraphemeData.ts index c9cb30d3ef..252631a9ad 100644 --- a/src/GraphemeData.ts +++ b/src/GraphemeData.ts @@ -2,5 +2,7 @@ export const FIRST: string = 'CgECARJfIQ0B//9UcP8UB/8ILQEBAQIBAgEBOAYKCwEBLhUQAWUHAQEGAgIBBCEBAQEeG1sLOgkiBAEJAQMBBSsDeA4BIAE2AQEBAQMIBAECAQcKAh0BAjgBAQECBAICAgIBCQEKAh0CATgBAQMCBAICAwMBHgIDAQsCATgBAQMFAQIBAQIBFAIWBgEBAjgBAQIBBAICAgIBCAIKAh4BOwEBAQIDAwEDAQkBKAEDOgMEAQMBBAcCCwIdAQI4AQEBAQIBAgEBAgECAgcCCwIcAgI3AgEBAgQBAwEDAQEIAQoCHgJGAQQBAgMBAQEHARICPQEBAQcMCGIBAQEGAQILBkoCGwEBAQEBBAIxDgEFAQIFCwEkCQFmBAEGAQICAhcCAgQDEAQNAQEBAgYBDwFiYEhY/14D////tQMdAx0CHgJAAgEHCAECCwkBLQMBdgIiAXYDBAIDBAIBBgPbAgIBOQEBAQcBAQEBAggGCgIBMA9BBAEvAQEFAQEFAQImCQwCAR4BBAICAQM4AQECAwEBAwIwCAgCApgDAQ0BBwQBBAIBAgECxjoBBf//DQEBAQIYBzEQYCH//////zEBIgEBAVICYgEOAQEEVgH//////48DjQFgIP//LAZpAgCnpqCgoACwCwCwsLCwsJCwoLCwuQsLCwkLCwsLCwsLCwsLm8C8sMvLwLCwvAsLywwMsLCwvAsMsLCwsLCwvAsMsLwMsLCwvAsLywwMsLCwsLy8DAywsLwLwLCwsLC8Cwy8vAvAywsLC8CwvLDAy5CwsMCwvLCwywwLDLCwsMsLCwsLCwsMC8sLCwsLC8sLywywsLCwywsLASMAsAALCwsLC8vLywsLoLCwvLwMvLC8sMvLCwsLywsLC8C8vLy8CwvAy8vLC8vLy8DLywsLywsMsMsLCwAKuKCgoLAAAA0ODg4NDg0OAAAAsLCwALCw'; // SECOND: 42606 <= codepoint < 65536 export const SECOND: string = 'AQQBCiACUAL/EQEDAQQBFwICAVgCMhACGhI0CBkLAgwdAwMBLwECBAIBBCQBQwYCAgICDAEIAQEuATMBAQMCAgUCAQEpAQICBQEB7AIBAgECAQEBEgEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwwXBDEE//////////8I////////////////////////////////////OQH//+MQEBDPAZ4CUAwEAAsLCwsAsLCwy8DAywsLC8AQvAvLy8CwvLywsLwLCwsLCwsMvAywy8vAywRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFAgMKqqqqqgAAAAAAAAAAAAAAAAAAsACwsKCwoA'; -// THIRD: codepoint >= 65536 -export const THIRD: string = ''; +// THIRD: Supplementary Multilingual Plane (1) 0 <= codepoint < 63966 +export const THIRD: string = 'Af0BCwLgAQsDdgULCgEDCwoFAgsKDAQLCjgDCwo/AQsK5QILEAABDBABAQsQAgEMEDgPCxB/AwsQggEMELADDBCzBAsQtwIMELkCCxC9AQkRAAMLEScFCxEsAQwRLQgLEXMBCxGAAgsRggEMEbMDDBG2CQsRvwIMEcICCRHKAwsSLAMMEi8DCxIyAgwSNAELEjUBDBI2AgsSPgELEt8BCxLgAwwS4wgLEwACCxMCAgwTPAELEz4BCxM/AQwTQAELE0EEDBNHAgwTSwMME1cBCxNiAgwTZgcLE3AFCxQ1AwwUOAgLFEACDBRCAwsURQEMFEYBCxSwAQsUsQIMFLMGCxS5AQwUugELFLsCDBS9AQsUvgEMFL8CCxTBAQwUwgILFa8BCxWwAgwVsgQLFbgEDBW8AgsVvgEMFb8CCxXcAgsWMAMMFjMICxY7AgwWPQELFj4BDBY/AgsWqwELFqwBDBatAQsWrgIMFrAGCxa2AQwWtwELFx0DCxcgAgwXIgQLFyYBDBcnBQsaAQYLGgcCDBoJAgsaMwYLGjkBDBo6AQkaOwQLGkcBCxpRBgsaVwIMGlkDCxqGBAkaig0LGpcBDBqYAgscLwEMHDAHCxw4BgscPgEMHD8BCxySFgscqQEMHKoHCxyxAQwcsgILHLQBDBy1AgsdMQYLHToBCx08AgsdPwcLHUYBCR1HAQtq8AULazAHC29RLgxvjwQLvJ0CC7ygBArRZQEL0WYBDNFnAwvRbQEM0W4FC9FzCArRewgL0YUHC9GqBAvSQgML2gA3C9o7MgvadQEL2oQBC9qbBQvaoQ8L4AAHC+AIEQvgGwcL4CMCC+AmBQvo0AcL6UQHC/HmGhHzCAEO8z4BDvNzAQ7zhQEN85MBDvOkAQ7zqAEO88IDDfPHAQ3zygMN8+sBDvPtAQ7z+wUP9EICDfRGCw30ZgQQ9G4BDfRwCQ30fAEN9IEDDfSFAw30iwEO9KoBDfS7Ag71JwEO9SwBDvV0Ag31egEN9ZABDfWVAg316AEO9kUDDfZLBQ32gAEO9pIBDvajAQ32tAMN9sABDfbMAQ35GAUN+R4CDfkmAQ35MAoN+T0CDfnRDQ0='; +// FOURTH: Supplement­ary Special-purpose Plane (14) 0 <= codepoint <= highest +export const FOURTH: string = 'AAAgCgAgYAsAgIAKAQDwCwHw/woC7/8KA+7/CgTt/woF7P8KBuv/Cgfq/woI6f8KCej/Cgrn/woL5v8KDOX/Cg3k/woO4/8KD+IeCg=='; From 204421548bfe6ff0473dbaefe9e047558d56fbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sun, 3 Jun 2018 03:55:29 +0200 Subject: [PATCH 4/5] break rules --- src/Grapheme.test.ts | 70 +++++++++++++++++++++++++++--- src/Grapheme.ts | 100 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 163 insertions(+), 7 deletions(-) diff --git a/src/Grapheme.test.ts b/src/Grapheme.test.ts index e89b2a69db..7c2381ecce 100644 --- a/src/Grapheme.test.ts +++ b/src/Grapheme.test.ts @@ -3,10 +3,10 @@ * @license MIT */ -import { graphemeType } from './Grapheme'; +import { graphemeType, canBreak, BreakState, Types } from './Grapheme'; import * as chai from 'chai'; -const TYPES = { +const _TYPES = { Other: 0, L: 1, V: 2, @@ -67,20 +67,80 @@ describe('grapheme cluster', function (): void { it('BMP (0)', function(): void { if (!CODEPOINTS) return; for (let cp = 0; cp < 65536; ++cp) { - chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); + chai.expect(graphemeType(cp)).equals(_TYPES[CODEPOINTS[cp]] || 0); } }); it('SMP (1)', function(): void { if (!CODEPOINTS) return; for (let cp = 65536; cp < 2 * 65536; ++cp) { - chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); + chai.expect(graphemeType(cp)).equals(_TYPES[CODEPOINTS[cp]] || 0); } }); it('SSP (14)', function(): void { if (!CODEPOINTS) return; for (let cp = 14 * 65536; cp < 15 * 65536; ++cp) { - chai.expect(graphemeType(cp)).equals(TYPES[CODEPOINTS[cp]] || 0); + chai.expect(graphemeType(cp)).equals(_TYPES[CODEPOINTS[cp]] || 0); } }); }); + describe('break rules', function(): void { + it('GB 3', function(): void { + chai.expect(canBreak(Types.LF, Types.CR)).equals(BreakState.FALSE); + }); + it('GB 4', function(): void { // TODO: test all states + const types = [Types.CONTROL, Types.CR, Types.LF]; + for (let pos in types) { + chai.expect(canBreak(Types.OTHER, types[pos])).equals(BreakState.TRUE); + } + }); + it('GB 5', function(): void { // TODO: test all states + const types = [Types.CONTROL, Types.CR, Types.LF]; + for (let pos in types) { + chai.expect(canBreak(types[pos], Types.OTHER)).equals(BreakState.TRUE); + } + }); + it('GB 6', function(): void { + const types = [Types.L, Types.V, Types.LV, Types.LVT]; + for (let pos in types) { + chai.expect(canBreak(types[pos], Types.L)).equals(BreakState.FALSE); + } + }); + it('GB 7', function(): void { + chai.expect(canBreak(Types.V, Types.LV)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.T, Types.LV)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.V, Types.V)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.T, Types.V)).equals(BreakState.FALSE); + }); + it('GB 8', function(): void { + chai.expect(canBreak(Types.T, Types.LVT)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.T, Types.T)).equals(BreakState.FALSE); + }); + it('GB 9', function(): void { + chai.expect(canBreak(Types.EXTEND, Types.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.ZWJ, Types.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.EXTEND, Types.E_BASE)).equals(BreakState.EMOJI_EXTEND); + chai.expect(canBreak(Types.ZWJ, Types.E_BASE)).equals(BreakState.EMOJI_EXTEND); // wrong here? + chai.expect(canBreak(Types.EXTEND, Types.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); + chai.expect(canBreak(Types.ZWJ, Types.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); // wrong here? + }); + it('GB 9a', function(): void { + chai.expect(canBreak(Types.SPACINGMARK, Types.OTHER)).equals(BreakState.FALSE); + }); + it('GB 9b', function(): void { + chai.expect(canBreak(Types.OTHER, Types.PREPEND)).equals(BreakState.FALSE); + }); + it('GB 10', function(): void { + chai.expect(canBreak(Types.E_MODIFIER, Types.E_BASE)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.E_MODIFIER, Types.E_BASE_GAZ)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.E_MODIFIER, BreakState.EMOJI_EXTEND)).equals(BreakState.FALSE); + }); + it('GB 11', function(): void { + chai.expect(canBreak(Types.GLUE_AFTER_ZWJ, Types.ZWJ)).equals(BreakState.FALSE); + chai.expect(canBreak(Types.E_BASE_GAZ, Types.ZWJ)).equals(BreakState.FALSE); + }); + it('GB 12 & 13', function(): void { + chai.expect(canBreak(Types.REGIONAL_INDICATOR, Types.REGIONAL_INDICATOR)).equals(BreakState.REGIONAL_SECOND); + chai.expect(canBreak(Types.REGIONAL_INDICATOR, BreakState.REGIONAL_SECOND)).equals(BreakState.TRUE); + }); + }); }); diff --git a/src/Grapheme.ts b/src/Grapheme.ts index e83e2252a6..999f046958 100644 --- a/src/Grapheme.ts +++ b/src/Grapheme.ts @@ -4,7 +4,7 @@ */ import { FIRST, SECOND, THIRD, FOURTH } from './GraphemeData'; -const enum Types { +export const enum Types { OTHER = 0, L = 1, V = 2, @@ -22,7 +22,8 @@ const enum Types { GLUE_AFTER_ZWJ = 14, E_MODIFIER = 15, E_BASE_GAZ = 16, - REGIONAL_INDICATOR = 17 + REGIONAL_INDICATOR = 17, + ILLEGAL = 31 } function loadFromPackedBMP(data: string, start: number, end: number): number[] | Uint8Array { @@ -117,3 +118,98 @@ export const graphemeType = (function(): (codepoint: number) => Types { return Types.OTHER; }; })(); + +export const enum BreakState { + FALSE = 32, + TRUE = 33, + EMOJI_EXTEND = 34, // does not break + REGIONAL_SECOND = 35 // does not break +} + +export function canBreak(current: Types | BreakState, previous: Types | BreakState): BreakState { + if (previous === Types.OTHER && current === Types.OTHER) { + return BreakState.TRUE; + } + // GB 1 sot ÷ Any + // if (previous === -1) --> handled at caller level + // return true; + // GB 2 Any ÷ eot + // if (current === -1) --> handled at caller level + // return true; + + // GB 3 CR × LF + if (previous === Types.CR && current === Types.LF) { + return BreakState.FALSE; + } + + // GB 4 (Control | CR | LF) ÷ + if (previous === Types.CONTROL || previous === Types.CR || previous === Types.LF) { + return BreakState.TRUE; + } + + // GB 5 ÷ (Control | CR | LF) + if (current === Types.CONTROL || current === Types.CR || current === Types.LF) { + return BreakState.TRUE; + } + + // GB 6 L × (L | V | LV | LVT) + if (previous === Types.L && (current === Types.L || current === Types.V || current === Types.LV || current === Types.LVT)) { + return BreakState.FALSE; + } + + // GB 7 (LV | V) × (V | T) + if ((previous === Types.LV || previous === Types.V) && (current === Types.V || current === Types.T)) { + return BreakState.FALSE; + } + + // GB 8 (LVT | T) × T + if ((previous === Types.LVT || previous === Types.T) && current === Types.T) { + return BreakState.FALSE; + } + + // GB 9 × (Extend | ZWJ) + if (current === Types.EXTEND || current === Types.ZWJ) { + if (previous === Types.E_BASE || previous === Types.E_BASE_GAZ) { + return BreakState.EMOJI_EXTEND; + } + return BreakState.FALSE; + } + + // GB 9a × SpacingMark + if (current === Types.SPACINGMARK) { + return BreakState.FALSE; + } + + // GB 9b Prepend × + if (previous === Types.PREPEND) { + return BreakState.FALSE; + } + + // GB 10 (E_Base | EBG) Extend* × E_Modifier + if ((previous === Types.E_BASE || previous === Types.E_BASE_GAZ) && current === Types.E_MODIFIER) { + return BreakState.FALSE; + } + + if (previous === BreakState.EMOJI_EXTEND && current === Types.E_MODIFIER) { + return BreakState.FALSE; + } + + // GB 11 ZWJ × (Glue_After_Zwj | EBG) + if (previous === Types.ZWJ && (current === Types.GLUE_AFTER_ZWJ || current === Types.E_BASE_GAZ)) { + return BreakState.FALSE; + } + + // GB 12 sot (RI RI)* RI × RI + // GB 13 [^RI] (RI RI)* RI × RI + if (previous === Types.REGIONAL_INDICATOR && current === Types.REGIONAL_INDICATOR) { + // return BreakState.False; + return BreakState.REGIONAL_SECOND; + } + + if (previous === BreakState.REGIONAL_SECOND && current === Types.REGIONAL_INDICATOR) { + return BreakState.TRUE; + } + + // GB 999 + return BreakState.TRUE; +} From 4c5539e9198bb09b55c168e69f81348e041fa984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Mon, 4 Jun 2018 15:50:51 +0200 Subject: [PATCH 5/5] first working version with GraphemeClusterIterator --- src/Grapheme.test.ts | 58 ++++++++++---------- src/Grapheme.ts | 100 +++++++++++++++++++++++++--------- src/InputHandler.ts | 124 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 54 deletions(-) diff --git a/src/Grapheme.test.ts b/src/Grapheme.test.ts index 7c2381ecce..13b0c1c68e 100644 --- a/src/Grapheme.test.ts +++ b/src/Grapheme.test.ts @@ -3,7 +3,7 @@ * @license MIT */ -import { graphemeType, canBreak, BreakState, Types } from './Grapheme'; +import { graphemeType, canBreak, BreakState, GraphemeTypes } from './Grapheme'; import * as chai from 'chai'; const _TYPES = { @@ -85,62 +85,62 @@ describe('grapheme cluster', function (): void { }); describe('break rules', function(): void { it('GB 3', function(): void { - chai.expect(canBreak(Types.LF, Types.CR)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.LF, GraphemeTypes.CR)).equals(BreakState.FALSE); }); it('GB 4', function(): void { // TODO: test all states - const types = [Types.CONTROL, Types.CR, Types.LF]; + const types = [GraphemeTypes.CONTROL, GraphemeTypes.CR, GraphemeTypes.LF]; for (let pos in types) { - chai.expect(canBreak(Types.OTHER, types[pos])).equals(BreakState.TRUE); + chai.expect(canBreak(GraphemeTypes.OTHER, types[pos])).equals(BreakState.TRUE); } }); it('GB 5', function(): void { // TODO: test all states - const types = [Types.CONTROL, Types.CR, Types.LF]; + const types = [GraphemeTypes.CONTROL, GraphemeTypes.CR, GraphemeTypes.LF]; for (let pos in types) { - chai.expect(canBreak(types[pos], Types.OTHER)).equals(BreakState.TRUE); + chai.expect(canBreak(types[pos], GraphemeTypes.OTHER)).equals(BreakState.TRUE); } }); it('GB 6', function(): void { - const types = [Types.L, Types.V, Types.LV, Types.LVT]; + const types = [GraphemeTypes.L, GraphemeTypes.V, GraphemeTypes.LV, GraphemeTypes.LVT]; for (let pos in types) { - chai.expect(canBreak(types[pos], Types.L)).equals(BreakState.FALSE); + chai.expect(canBreak(types[pos], GraphemeTypes.L)).equals(BreakState.FALSE); } }); it('GB 7', function(): void { - chai.expect(canBreak(Types.V, Types.LV)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.T, Types.LV)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.V, Types.V)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.T, Types.V)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.V, GraphemeTypes.LV)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.LV)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.V, GraphemeTypes.V)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.V)).equals(BreakState.FALSE); }); it('GB 8', function(): void { - chai.expect(canBreak(Types.T, Types.LVT)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.T, Types.T)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.LVT)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.T)).equals(BreakState.FALSE); }); it('GB 9', function(): void { - chai.expect(canBreak(Types.EXTEND, Types.OTHER)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.ZWJ, Types.OTHER)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.EXTEND, Types.E_BASE)).equals(BreakState.EMOJI_EXTEND); - chai.expect(canBreak(Types.ZWJ, Types.E_BASE)).equals(BreakState.EMOJI_EXTEND); // wrong here? - chai.expect(canBreak(Types.EXTEND, Types.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); - chai.expect(canBreak(Types.ZWJ, Types.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); // wrong here? + chai.expect(canBreak(GraphemeTypes.EXTEND, GraphemeTypes.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.ZWJ, GraphemeTypes.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.EXTEND, GraphemeTypes.E_BASE)).equals(BreakState.EMOJI_EXTEND); + chai.expect(canBreak(GraphemeTypes.ZWJ, GraphemeTypes.E_BASE)).equals(BreakState.EMOJI_EXTEND); // wrong here? + chai.expect(canBreak(GraphemeTypes.EXTEND, GraphemeTypes.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); + chai.expect(canBreak(GraphemeTypes.ZWJ, GraphemeTypes.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); // wrong here? }); it('GB 9a', function(): void { - chai.expect(canBreak(Types.SPACINGMARK, Types.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.SPACINGMARK, GraphemeTypes.OTHER)).equals(BreakState.FALSE); }); it('GB 9b', function(): void { - chai.expect(canBreak(Types.OTHER, Types.PREPEND)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.OTHER, GraphemeTypes.PREPEND)).equals(BreakState.FALSE); }); it('GB 10', function(): void { - chai.expect(canBreak(Types.E_MODIFIER, Types.E_BASE)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.E_MODIFIER, Types.E_BASE_GAZ)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.E_MODIFIER, BreakState.EMOJI_EXTEND)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_MODIFIER, GraphemeTypes.E_BASE)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_MODIFIER, GraphemeTypes.E_BASE_GAZ)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_MODIFIER, BreakState.EMOJI_EXTEND)).equals(BreakState.FALSE); }); it('GB 11', function(): void { - chai.expect(canBreak(Types.GLUE_AFTER_ZWJ, Types.ZWJ)).equals(BreakState.FALSE); - chai.expect(canBreak(Types.E_BASE_GAZ, Types.ZWJ)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.GLUE_AFTER_ZWJ, GraphemeTypes.ZWJ)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_BASE_GAZ, GraphemeTypes.ZWJ)).equals(BreakState.FALSE); }); it('GB 12 & 13', function(): void { - chai.expect(canBreak(Types.REGIONAL_INDICATOR, Types.REGIONAL_INDICATOR)).equals(BreakState.REGIONAL_SECOND); - chai.expect(canBreak(Types.REGIONAL_INDICATOR, BreakState.REGIONAL_SECOND)).equals(BreakState.TRUE); + chai.expect(canBreak(GraphemeTypes.REGIONAL_INDICATOR, GraphemeTypes.REGIONAL_INDICATOR)).equals(BreakState.REGIONAL_SECOND); + chai.expect(canBreak(GraphemeTypes.REGIONAL_INDICATOR, BreakState.REGIONAL_SECOND)).equals(BreakState.TRUE); }); }); }); diff --git a/src/Grapheme.ts b/src/Grapheme.ts index 999f046958..9e6cf26034 100644 --- a/src/Grapheme.ts +++ b/src/Grapheme.ts @@ -3,8 +3,9 @@ * @license MIT */ import { FIRST, SECOND, THIRD, FOURTH } from './GraphemeData'; +import { wcwidth } from './CharWidth'; -export const enum Types { +export const enum GraphemeTypes { OTHER = 0, L = 1, V = 2, @@ -70,13 +71,13 @@ function loadFromPackedHIGH(lookupObj: any, data: string, plane: number): void { } } -export const graphemeType = (function(): (codepoint: number) => Types { +export const graphemeType = (function(): (codepoint: number) => GraphemeTypes { let BMP_LOW = null; let BMP_HIGH = null; let HIGH = null; - return (codepoint: number): Types => { + return (codepoint: number): GraphemeTypes => { // ASCII printable shortcut - if (31 < codepoint && codepoint < 127) return Types.OTHER; + if (31 < codepoint && codepoint < 127) return GraphemeTypes.OTHER; // BMP_LOW: 0 <= codepoint < 12443 if (codepoint < 12443) { @@ -88,7 +89,7 @@ export const graphemeType = (function(): (codepoint: number) => Types { } // always Other: 12443 <= codepoint < 42606 - if (codepoint < 42606) return Types.OTHER; + if (codepoint < 42606) return GraphemeTypes.OTHER; // BMP_HIGH (CJK): 42606 <= codepoint < 65536 if (codepoint < 65536) { @@ -111,11 +112,11 @@ export const graphemeType = (function(): (codepoint: number) => Types { loadFromPackedHIGH(HIGH, FOURTH, 14); return HIGH; })(); - return lookupObj[codepoint] || Types.OTHER; + return lookupObj[codepoint] || GraphemeTypes.OTHER; } // all other codepoints default to Other - return Types.OTHER; + return GraphemeTypes.OTHER; }; })(); @@ -123,11 +124,12 @@ export const enum BreakState { FALSE = 32, TRUE = 33, EMOJI_EXTEND = 34, // does not break - REGIONAL_SECOND = 35 // does not break + REGIONAL_SECOND = 35, // does not break + SURROGATE = 36 // does not break } -export function canBreak(current: Types | BreakState, previous: Types | BreakState): BreakState { - if (previous === Types.OTHER && current === Types.OTHER) { +export function canBreak(current: GraphemeTypes | BreakState, previous: GraphemeTypes | BreakState): BreakState { + if (previous === GraphemeTypes.OTHER && current === GraphemeTypes.OTHER) { return BreakState.TRUE; } // GB 1 sot ÷ Any @@ -138,78 +140,126 @@ export function canBreak(current: Types | BreakState, previous: Types | BreakSta // return true; // GB 3 CR × LF - if (previous === Types.CR && current === Types.LF) { + if (previous === GraphemeTypes.CR && current === GraphemeTypes.LF) { return BreakState.FALSE; } // GB 4 (Control | CR | LF) ÷ - if (previous === Types.CONTROL || previous === Types.CR || previous === Types.LF) { + if (previous === GraphemeTypes.CONTROL || previous === GraphemeTypes.CR || previous === GraphemeTypes.LF) { return BreakState.TRUE; } // GB 5 ÷ (Control | CR | LF) - if (current === Types.CONTROL || current === Types.CR || current === Types.LF) { + if (current === GraphemeTypes.CONTROL || current === GraphemeTypes.CR || current === GraphemeTypes.LF) { return BreakState.TRUE; } // GB 6 L × (L | V | LV | LVT) - if (previous === Types.L && (current === Types.L || current === Types.V || current === Types.LV || current === Types.LVT)) { + if (previous === GraphemeTypes.L && (current === GraphemeTypes.L || current === GraphemeTypes.V || current === GraphemeTypes.LV || current === GraphemeTypes.LVT)) { return BreakState.FALSE; } // GB 7 (LV | V) × (V | T) - if ((previous === Types.LV || previous === Types.V) && (current === Types.V || current === Types.T)) { + if ((previous === GraphemeTypes.LV || previous === GraphemeTypes.V) && (current === GraphemeTypes.V || current === GraphemeTypes.T)) { return BreakState.FALSE; } // GB 8 (LVT | T) × T - if ((previous === Types.LVT || previous === Types.T) && current === Types.T) { + if ((previous === GraphemeTypes.LVT || previous === GraphemeTypes.T) && current === GraphemeTypes.T) { return BreakState.FALSE; } // GB 9 × (Extend | ZWJ) - if (current === Types.EXTEND || current === Types.ZWJ) { - if (previous === Types.E_BASE || previous === Types.E_BASE_GAZ) { + if (current === GraphemeTypes.EXTEND || current === GraphemeTypes.ZWJ) { + if (previous === GraphemeTypes.E_BASE || previous === GraphemeTypes.E_BASE_GAZ) { return BreakState.EMOJI_EXTEND; } return BreakState.FALSE; } // GB 9a × SpacingMark - if (current === Types.SPACINGMARK) { + if (current === GraphemeTypes.SPACINGMARK) { return BreakState.FALSE; } // GB 9b Prepend × - if (previous === Types.PREPEND) { + if (previous === GraphemeTypes.PREPEND) { return BreakState.FALSE; } // GB 10 (E_Base | EBG) Extend* × E_Modifier - if ((previous === Types.E_BASE || previous === Types.E_BASE_GAZ) && current === Types.E_MODIFIER) { + if ((previous === GraphemeTypes.E_BASE || previous === GraphemeTypes.E_BASE_GAZ) && current === GraphemeTypes.E_MODIFIER) { return BreakState.FALSE; } - if (previous === BreakState.EMOJI_EXTEND && current === Types.E_MODIFIER) { + if (previous === BreakState.EMOJI_EXTEND && current === GraphemeTypes.E_MODIFIER) { return BreakState.FALSE; } // GB 11 ZWJ × (Glue_After_Zwj | EBG) - if (previous === Types.ZWJ && (current === Types.GLUE_AFTER_ZWJ || current === Types.E_BASE_GAZ)) { + if (previous === GraphemeTypes.ZWJ && (current === GraphemeTypes.GLUE_AFTER_ZWJ || current === GraphemeTypes.E_BASE_GAZ)) { return BreakState.FALSE; } // GB 12 sot (RI RI)* RI × RI // GB 13 [^RI] (RI RI)* RI × RI - if (previous === Types.REGIONAL_INDICATOR && current === Types.REGIONAL_INDICATOR) { + if (previous === GraphemeTypes.REGIONAL_INDICATOR && current === GraphemeTypes.REGIONAL_INDICATOR) { // return BreakState.False; return BreakState.REGIONAL_SECOND; } - if (previous === BreakState.REGIONAL_SECOND && current === Types.REGIONAL_INDICATOR) { + if (previous === BreakState.REGIONAL_SECOND && current === GraphemeTypes.REGIONAL_INDICATOR) { return BreakState.TRUE; } // GB 999 return BreakState.TRUE; } + +export class GraphemeClusterIterator { + public wcwidth: number = 0; + private _wcwidth: number = 0; + public breakPosition: number = -1; + constructor( + public data: string, + public current: number, + public end: number, + public lastType: GraphemeTypes | BreakState = GraphemeTypes.CONTROL + ) {} + public next(): void { + this.wcwidth = this._wcwidth; + this._wcwidth = 0; + for (let i = this.current; i < this.end; ++i) { + let code = this.data.charCodeAt(i) | 0; + if (0xD800 <= code && code <= 0xDBFF) { + i++; + let low = this.data.charCodeAt(i); + if (isNaN(low)) { + this.lastType = BreakState.SURROGATE; + this.breakPosition = -1; + this.current = this.end; + i = this.end; + return; + } + code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; + } + this._wcwidth += wcwidth(code); + let currentType: GraphemeTypes | BreakState = graphemeType(code); + let breakState = canBreak(currentType, this.lastType); + if (breakState === BreakState.TRUE) { + this.breakPosition = (code > 65535) ? i - 1 : i; + this.current = i + 1; + this.lastType = currentType; + return; + } + if (breakState === BreakState.REGIONAL_SECOND) { + this.lastType = BreakState.REGIONAL_SECOND; + } else if (breakState === BreakState.EMOJI_EXTEND) { + this.lastType = BreakState.EMOJI_EXTEND; + } + } + this.wcwidth = this._wcwidth; + this.current = this.end + 1; + this.breakPosition = this.end; + } +} \ No newline at end of file diff --git a/src/InputHandler.ts b/src/InputHandler.ts index 7b12b6de13..c3862f4d01 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -11,6 +11,7 @@ import { CHAR_DATA_CHAR_INDEX, CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CODE_INDEX } fro import { FLAGS } from './renderer/Types'; import { wcwidth } from './CharWidth'; import { EscapeSequenceParser } from './EscapeSequenceParser'; +import { GraphemeClusterIterator } from './Grapheme'; /** * Map collect to glevel. Used in `selectCharset`. @@ -307,6 +308,128 @@ export class InputHandler implements IInputHandler { } public print(data: string, start: number, end: number): void { + // let s = '😜🇺🇸👍🇺🇸🇺🇸'; + // let s = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞"; + // let s = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞"; + // let s = "עברית"; + // let s = '😜🇺🇸👍🇺🇸🇺🇸'; + // let s = "अनुच्छेद"; + // let s = "🌷🎁💩😜👍🏳️‍🌈"; + // let s = "뎌쉐"; + + /* + const test = new GraphemeClusterIterator(s, 0, s.length); + let carry = 0; + do { + test.next(); + console.log([s.substring(carry, test.breakPosition), test.wcwidth, test.breakPosition]); + carry = test.breakPosition; + } while (test.current < test.end); + */ + + const buffer: IBuffer = this._terminal.buffer; + const charset: ICharset = this._terminal.charset; + const screenReaderMode: boolean = this._terminal.options.screenReaderMode; + const cols: number = this._terminal.cols; + const wraparoundMode: boolean = this._terminal.wraparoundMode; + const insertMode: boolean = this._terminal.insertMode; + const curAttr: number = this._terminal.curAttr; + let bufferRow = buffer.lines.get(buffer.y + buffer.ybase); + + this._terminal.updateRange(buffer.y); + + + // FIXME: end + 1 - may lead to errors? + const it = new GraphemeClusterIterator(data, start, end + 1); + let lastBreak = start; + do { + it.next(); + + if (it.breakPosition === -1) continue; + if (lastBreak === it.breakPosition) continue; + + let chWidth = it.wcwidth; + let char = (it.breakPosition - lastBreak === 1) + ? data.charAt(lastBreak) + : data.substring(lastBreak, it.breakPosition); + // console.log(char, chWidth); + + if (charset) { + char = charset[char] || char; + } + + if (screenReaderMode) { + this._terminal.emit('a11y.char', char); + } + + // goto next line if ch would overflow + // TODO: needs a global min terminal width of 2 + if (buffer.x + chWidth - 1 >= cols) { + // autowrap - DECAWM + // automatically wraps to the beginning of the next line + if (wraparoundMode) { + buffer.x = 0; + buffer.y++; + if (buffer.y > buffer.scrollBottom) { + buffer.y--; + this._terminal.scroll(true); + } else { + // The line already exists (eg. the initial viewport), mark it as a + // wrapped line + (buffer.lines.get(buffer.y)).isWrapped = true; + } + // row changed, get it again + bufferRow = buffer.lines.get(buffer.y + buffer.ybase); + } else { + if (chWidth === 2) { + // FIXME: check for xterm behavior + // What to do here? We got a wide char that does not fit into last cell + continue; + } + // FIXME: Do we have to set buffer.x to cols - 1, if not wrapping? + } + } + + // insert mode: move characters to right + // To achieve insert, we remove cells from the right + // and insert empty ones at cursor position + if (insertMode) { + // do this twice for a fullwidth char + for (let moves = 0; moves < chWidth; ++moves) { + // remove last cell + // if it's width is 0, we have to adjust the second last cell as well + let removed = bufferRow.pop(); + if (removed[CHAR_DATA_WIDTH_INDEX] === 0 + && bufferRow[this._terminal.cols - 2] + && bufferRow[this._terminal.cols - 2][CHAR_DATA_WIDTH_INDEX] === 2) { + bufferRow[this._terminal.cols - 2] = [curAttr, ' ', 1, 32 /* ' '.charCodeAt(0) */ ]; + } + + // insert empty cell at cursor + bufferRow.splice(buffer.x, 0, [curAttr, ' ', 1, 32 /* ' '.charCodeAt(0) */ ]); + } + } + + // write current char to buffer and advance cursor + // use char cache only for char.length === 1 + bufferRow[buffer.x++] = [curAttr, char, chWidth, (char.length === 1) ? char.charCodeAt(0) : 65535]; + + // fullwidth char - also set next cell to placeholder stub and advance cursor + if (chWidth === 2) { + bufferRow[buffer.x++] = [curAttr, '', 0, undefined]; + } + + + + lastBreak = it.breakPosition; + } while (it.current < it.end); + + this._terminal.updateRange(buffer.y); + + // this.print_(data, start, end); + } + + public print_(data: string, start: number, end: number): void { let char: string; let code: number; let low: number; @@ -435,6 +558,7 @@ export class InputHandler implements IInputHandler { if (chWidth === 2) { bufferRow[buffer.x++] = [curAttr, '', 0, undefined]; } + } this._terminal.updateRange(buffer.y); }