diff --git a/bin/create-graphemedata.js b/bin/create-graphemedata.js new file mode 100755 index 0000000000..3cdb5f9e9b --- /dev/null +++ b/bin/create-graphemedata.js @@ -0,0 +1,169 @@ +#!/usr/bin/env node +'use strict'; + +const URL = 'https://www.unicode.org/Public/10.0.0/ucd/auxiliary/GraphemeBreakProperty.txt'; +const PATH = __dirname + '/../src/GraphemeData.ts'; + +const GRAPHEME_REX = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm; + +const TYPES = { + Other: 0, + L: 1, + V: 2, + T: 3, + LV: 4, + LVT: 5, + CR: 6, + LF: 7, + ZWJ: 8, + Prepend: 9, + Control: 10, + Extend: 11, + SpacingMark: 12, + E_Base: 13, + Glue_After_Zwj: 14, + E_Modifier: 15, + E_Base_GAZ: 16, + Regional_Indicator: 17 +}; + +function parseDefinitions(data) { + let codepoints = Object.create(null); + let match = null; + while (match = GRAPHEME_REX.exec(data)) { + let start = parseInt(match[1], 16); + let end = parseInt(match[2], 16) || start; + for (let i = start; i < end + 1; ++i) + codepoints[i] = match[3]; + } + return codepoints; +} + + +function createPackedBMP(codepoints, start, end) { + let type = -1; + let count = 0; + let lengths = []; + let types = []; + for (let i = start; i < end; ++i) { + let t = parseInt(TYPES[codepoints[i] || 'Other']); + if (t !== type) { + lengths.push(count); + types.push(type); + type = t; + count = 0; + } + if (count === 255) { + lengths.push(count); + types.push(type); + count = 0; + } + count++; + } + lengths.push(count); + types.push(type); + + // remove start entries + lengths.shift(); + types.shift(); + + if (types.length & 1) + types.push(0); + + let accu = 0; + let finalTypes = []; + for (let i = 0; i < types.length; ++i) { + accu <<= 4; + accu |= types[i]; + if (i & 1) { + finalTypes.push(accu); + accu = 0; + } + } + + // null terminate length values + lengths.push(0); + return new Buffer(lengths.concat(finalTypes)).toString('base64'); +} + +function createPackedHIGH(codepoints, plane, start, end) { + start = start + 65536 * plane; + end = end + 65536 * plane; + let length = 0; + let type = -1; + const segments = []; + let segmentStart = -1; + for (let i = start; i < end; ++i) { + let t = parseInt(TYPES[codepoints[i] || 'Other']); + if (t !== type) { + // end of segment reached + // only push non Other segments + if (type) segments.push([segmentStart, length, type]); + segmentStart = i; + length = 0; + type = t; + } + if (length === 255) { + if (type) { + segments.push([segmentStart, length, type]); + segmentStart = i; + length = 0; + } + } + length++; + } + if (type) segments.push([segmentStart, length, type]); + segments.shift(); + console.log(segments); + + // write to byte typed + let final = []; + for (let i = 0; i < segments.length; ++i) { + final.push(segments[i][0] >> 8); + final.push(segments[i][0] & 255); + final.push(segments[i][1]); + final.push(segments[i][2]); + } + return new Buffer(final).toString('base64'); +} + + +function createGraphemeDataFile(url, path) { + require('https').get(url, (resp) => { + let data = ''; + resp.on('data', (chunk) => { + data += chunk; + }); + resp.on('end', () => { + const codepoints = parseDefinitions(data); + let highest = 0; + for (let el in codepoints) + highest = Math.max(highest, parseInt(el)); + + // codepoint < 12443 + const first = createPackedBMP(codepoints, 0, 12443); + // 42606 <= codepoint < 65536 + const second = createPackedBMP(codepoints, 42606, 65536); + // Supplementary Multilingual Plane (1): 0 <= codepoint < 63966 + const third = createPackedHIGH(codepoints, 1, 0, 63966); + // Supplement­ary Special-purpose Plane (14): 0 <= codepoint < highest + 1 + const fourth = createPackedHIGH(codepoints, 14, 0, highest + 1); + + // write to ts file + let final = ''; + final += `// FIRST: 0 <= codepoint < 12443\n`; + final += `export const FIRST: string = '${first}';\n`; + final += `// SECOND: 42606 <= codepoint < 65536\n`; + final += `export const SECOND: string = '${second}';\n`; + final += `// THIRD: Supplementary Multilingual Plane (1) 0 <= codepoint < 63966\n`; + final += `export const THIRD: string = '${third}';\n`; + final += `// FOURTH: Supplement­ary Special-purpose Plane (14) 0 <= codepoint <= highest\n`; + final += `export const FOURTH: string = '${fourth}';\n`; + require('fs').writeFileSync(path, final); + }); + }).on('error', (err) => { + console.log('error', err.message); + }); +} + +createGraphemeDataFile(URL, PATH); diff --git a/package.json b/package.json index 44cc2e88d2..3a62dc5029 100644 --- a/package.json +++ b/package.json @@ -55,6 +55,7 @@ "prepublish": "npm run build", "coveralls": "gulp coveralls", "webpack": "gulp webpack", - "watch": "gulp watch" + "watch": "gulp watch", + "graphemedata": "node bin/create-graphemedata" } } diff --git a/src/Grapheme.test.ts b/src/Grapheme.test.ts new file mode 100644 index 0000000000..13b0c1c68e --- /dev/null +++ b/src/Grapheme.test.ts @@ -0,0 +1,146 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { graphemeType, canBreak, BreakState, GraphemeTypes } from './Grapheme'; +import * as chai from 'chai'; + +const _TYPES = { + Other: 0, + L: 1, + V: 2, + T: 3, + LV: 4, + LVT: 5, + CR: 6, + LF: 7, + ZWJ: 8, + Prepend: 9, + Control: 10, + Extend: 11, + SpacingMark: 12, + E_Base: 13, + Glue_After_Zwj: 14, + E_Modifier: 15, + E_Base_GAZ: 16, + Regional_Indicator: 17 +}; + +const URL = 'https://www.unicode.org/Public/10.0.0/ucd/auxiliary/GraphemeBreakProperty.txt'; +const GRAPHEME_REX = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)/gm; + +let CODEPOINTS = null; + +function parseDefinitions(data: string): {[key: number]: number} { + const codepoints = Object.create(null); + let match = null; + while (match = GRAPHEME_REX.exec(data)) { + const start = parseInt(match[1], 16); + const end = parseInt(match[2], 16) || start; + for (let i = start; i < end + 1; ++i) codepoints[i] = match[3]; + } + return codepoints; +} + +function loadUnicodeData(done: Function): void { + require('https').get(URL, (resp): any => { + let data = ''; + resp.on('data', (chunk): void => { + data += chunk; + }); + resp.on('end', () => { + CODEPOINTS = parseDefinitions(data); + done(); + }); + }).on('error', (err) => { + throw Error('error fetching unicode data'); + }); +} + +describe('grapheme cluster', function (): void { + before(function(done: Function): void { + this.timeout(5000); + loadUnicodeData(done); + }); + describe('graphemeType', function(): void { + it('BMP (0)', function(): void { + if (!CODEPOINTS) return; + for (let cp = 0; cp < 65536; ++cp) { + chai.expect(graphemeType(cp)).equals(_TYPES[CODEPOINTS[cp]] || 0); + } + }); + it('SMP (1)', function(): void { + if (!CODEPOINTS) return; + for (let cp = 65536; cp < 2 * 65536; ++cp) { + chai.expect(graphemeType(cp)).equals(_TYPES[CODEPOINTS[cp]] || 0); + } + }); + it('SSP (14)', function(): void { + if (!CODEPOINTS) return; + for (let cp = 14 * 65536; cp < 15 * 65536; ++cp) { + chai.expect(graphemeType(cp)).equals(_TYPES[CODEPOINTS[cp]] || 0); + } + }); + }); + describe('break rules', function(): void { + it('GB 3', function(): void { + chai.expect(canBreak(GraphemeTypes.LF, GraphemeTypes.CR)).equals(BreakState.FALSE); + }); + it('GB 4', function(): void { // TODO: test all states + const types = [GraphemeTypes.CONTROL, GraphemeTypes.CR, GraphemeTypes.LF]; + for (let pos in types) { + chai.expect(canBreak(GraphemeTypes.OTHER, types[pos])).equals(BreakState.TRUE); + } + }); + it('GB 5', function(): void { // TODO: test all states + const types = [GraphemeTypes.CONTROL, GraphemeTypes.CR, GraphemeTypes.LF]; + for (let pos in types) { + chai.expect(canBreak(types[pos], GraphemeTypes.OTHER)).equals(BreakState.TRUE); + } + }); + it('GB 6', function(): void { + const types = [GraphemeTypes.L, GraphemeTypes.V, GraphemeTypes.LV, GraphemeTypes.LVT]; + for (let pos in types) { + chai.expect(canBreak(types[pos], GraphemeTypes.L)).equals(BreakState.FALSE); + } + }); + it('GB 7', function(): void { + chai.expect(canBreak(GraphemeTypes.V, GraphemeTypes.LV)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.LV)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.V, GraphemeTypes.V)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.V)).equals(BreakState.FALSE); + }); + it('GB 8', function(): void { + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.LVT)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.T, GraphemeTypes.T)).equals(BreakState.FALSE); + }); + it('GB 9', function(): void { + chai.expect(canBreak(GraphemeTypes.EXTEND, GraphemeTypes.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.ZWJ, GraphemeTypes.OTHER)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.EXTEND, GraphemeTypes.E_BASE)).equals(BreakState.EMOJI_EXTEND); + chai.expect(canBreak(GraphemeTypes.ZWJ, GraphemeTypes.E_BASE)).equals(BreakState.EMOJI_EXTEND); // wrong here? + chai.expect(canBreak(GraphemeTypes.EXTEND, GraphemeTypes.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); + chai.expect(canBreak(GraphemeTypes.ZWJ, GraphemeTypes.E_BASE_GAZ)).equals(BreakState.EMOJI_EXTEND); // wrong here? + }); + it('GB 9a', function(): void { + chai.expect(canBreak(GraphemeTypes.SPACINGMARK, GraphemeTypes.OTHER)).equals(BreakState.FALSE); + }); + it('GB 9b', function(): void { + chai.expect(canBreak(GraphemeTypes.OTHER, GraphemeTypes.PREPEND)).equals(BreakState.FALSE); + }); + it('GB 10', function(): void { + chai.expect(canBreak(GraphemeTypes.E_MODIFIER, GraphemeTypes.E_BASE)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_MODIFIER, GraphemeTypes.E_BASE_GAZ)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_MODIFIER, BreakState.EMOJI_EXTEND)).equals(BreakState.FALSE); + }); + it('GB 11', function(): void { + chai.expect(canBreak(GraphemeTypes.GLUE_AFTER_ZWJ, GraphemeTypes.ZWJ)).equals(BreakState.FALSE); + chai.expect(canBreak(GraphemeTypes.E_BASE_GAZ, GraphemeTypes.ZWJ)).equals(BreakState.FALSE); + }); + it('GB 12 & 13', function(): void { + chai.expect(canBreak(GraphemeTypes.REGIONAL_INDICATOR, GraphemeTypes.REGIONAL_INDICATOR)).equals(BreakState.REGIONAL_SECOND); + chai.expect(canBreak(GraphemeTypes.REGIONAL_INDICATOR, BreakState.REGIONAL_SECOND)).equals(BreakState.TRUE); + }); + }); +}); diff --git a/src/Grapheme.ts b/src/Grapheme.ts new file mode 100644 index 0000000000..9e6cf26034 --- /dev/null +++ b/src/Grapheme.ts @@ -0,0 +1,265 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { FIRST, SECOND, THIRD, FOURTH } from './GraphemeData'; +import { wcwidth } from './CharWidth'; + +export const enum GraphemeTypes { + OTHER = 0, + L = 1, + V = 2, + T = 3, + LV = 4, + LVT = 5, + CR = 6, + LF = 7, + ZWJ = 8, + PREPEND = 9, + CONTROL = 10, + EXTEND = 11, + SPACINGMARK = 12, + E_BASE = 13, + GLUE_AFTER_ZWJ = 14, + E_MODIFIER = 15, + E_BASE_GAZ = 16, + REGIONAL_INDICATOR = 17, + ILLEGAL = 31 +} + +function loadFromPackedBMP(data: string, start: number, end: number): number[] | Uint8Array { + // decode base64 and split into lengths and types strings + const raw = (typeof atob === 'undefined') + // nodejs + ? new Buffer(data, 'base64').toString('binary') + // browser - FIXME: how to test this? + : atob(data); + // first occurence of 0x0 marks end of lengths (null terminated) + const lengths = raw.substring(0, raw.indexOf('\x00')); + const types = raw.substring(raw.indexOf('\x00') + 1); + + // lookup table with 2 type entries per index position + const table = (typeof Uint8Array === 'undefined') + ? new Array(((end - start) >> 1) + 1) + : new Uint8Array(((end - start) >> 1) + 1); + + // load data into lookup table + let codepointOffset = 0; + for (let chunkIdx = 0; chunkIdx < lengths.length; ++chunkIdx) { + const currentLength = lengths.charCodeAt(chunkIdx); + for (let chunkPos = 0; chunkPos < currentLength; ++chunkPos) { + const tcode = types.charCodeAt(chunkIdx >> 1); + const type = (chunkIdx & 1) ? tcode & 15 : tcode >> 4; + table[(codepointOffset + chunkPos) >> 1] |= ((codepointOffset + chunkPos) & 1) ? type << 4 : type; + } + codepointOffset += currentLength; + } + return table; +} + +function loadFromPackedHIGH(lookupObj: any, data: string, plane: number): void { + const raw = (typeof atob === 'undefined') + ? new Buffer(data, 'base64').toString('binary') + : atob(data); + + // data bytes: [codepoint_high, codepoint_low, length, type] + for (let i = 0; i < raw.length; i += 4) { + let codepoint = (raw.charCodeAt(i) << 8) + raw.charCodeAt(i + 1) + 65536 * plane; + let end = raw.charCodeAt(i + 2) + codepoint; + let type = raw.charCodeAt(i + 3); + for (let cp = codepoint; cp < end; ++cp) lookupObj[cp] = type; + } +} + +export const graphemeType = (function(): (codepoint: number) => GraphemeTypes { + let BMP_LOW = null; + let BMP_HIGH = null; + let HIGH = null; + return (codepoint: number): GraphemeTypes => { + // ASCII printable shortcut + if (31 < codepoint && codepoint < 127) return GraphemeTypes.OTHER; + + // BMP_LOW: 0 <= codepoint < 12443 + if (codepoint < 12443) { + const table = BMP_LOW || ((): number[] | Uint8Array => { + BMP_LOW = loadFromPackedBMP(FIRST, 0, 12443); + return BMP_LOW; + })(); + return (codepoint & 1) ? table[codepoint >> 1] >> 4 : table[codepoint >> 1] & 15; + } + + // always Other: 12443 <= codepoint < 42606 + if (codepoint < 42606) return GraphemeTypes.OTHER; + + // BMP_HIGH (CJK): 42606 <= codepoint < 65536 + if (codepoint < 65536) { + const table = BMP_HIGH || ((): number[] | Uint8Array => { + BMP_HIGH = loadFromPackedBMP(SECOND, 42606, 65536); + return BMP_HIGH; + })(); + codepoint -= 42606; + return (codepoint & 1) ? table[codepoint >> 1] >> 4 : table[codepoint >> 1] & 15; + } + + // codepoint > 65536 + // 129502 highest in SMP (Plane 1) + // 917504 lowest in SSP (Plane 14) + // 921599 highest in SSP + if (codepoint < 129503 || (917504 <= codepoint && codepoint < 921600)) { + const lookupObj = HIGH || ((): any => { + HIGH = Object.create(null); + loadFromPackedHIGH(HIGH, THIRD, 1); + loadFromPackedHIGH(HIGH, FOURTH, 14); + return HIGH; + })(); + return lookupObj[codepoint] || GraphemeTypes.OTHER; + } + + // all other codepoints default to Other + return GraphemeTypes.OTHER; + }; +})(); + +export const enum BreakState { + FALSE = 32, + TRUE = 33, + EMOJI_EXTEND = 34, // does not break + REGIONAL_SECOND = 35, // does not break + SURROGATE = 36 // does not break +} + +export function canBreak(current: GraphemeTypes | BreakState, previous: GraphemeTypes | BreakState): BreakState { + if (previous === GraphemeTypes.OTHER && current === GraphemeTypes.OTHER) { + return BreakState.TRUE; + } + // GB 1 sot ÷ Any + // if (previous === -1) --> handled at caller level + // return true; + // GB 2 Any ÷ eot + // if (current === -1) --> handled at caller level + // return true; + + // GB 3 CR × LF + if (previous === GraphemeTypes.CR && current === GraphemeTypes.LF) { + return BreakState.FALSE; + } + + // GB 4 (Control | CR | LF) ÷ + if (previous === GraphemeTypes.CONTROL || previous === GraphemeTypes.CR || previous === GraphemeTypes.LF) { + return BreakState.TRUE; + } + + // GB 5 ÷ (Control | CR | LF) + if (current === GraphemeTypes.CONTROL || current === GraphemeTypes.CR || current === GraphemeTypes.LF) { + return BreakState.TRUE; + } + + // GB 6 L × (L | V | LV | LVT) + if (previous === GraphemeTypes.L && (current === GraphemeTypes.L || current === GraphemeTypes.V || current === GraphemeTypes.LV || current === GraphemeTypes.LVT)) { + return BreakState.FALSE; + } + + // GB 7 (LV | V) × (V | T) + if ((previous === GraphemeTypes.LV || previous === GraphemeTypes.V) && (current === GraphemeTypes.V || current === GraphemeTypes.T)) { + return BreakState.FALSE; + } + + // GB 8 (LVT | T) × T + if ((previous === GraphemeTypes.LVT || previous === GraphemeTypes.T) && current === GraphemeTypes.T) { + return BreakState.FALSE; + } + + // GB 9 × (Extend | ZWJ) + if (current === GraphemeTypes.EXTEND || current === GraphemeTypes.ZWJ) { + if (previous === GraphemeTypes.E_BASE || previous === GraphemeTypes.E_BASE_GAZ) { + return BreakState.EMOJI_EXTEND; + } + return BreakState.FALSE; + } + + // GB 9a × SpacingMark + if (current === GraphemeTypes.SPACINGMARK) { + return BreakState.FALSE; + } + + // GB 9b Prepend × + if (previous === GraphemeTypes.PREPEND) { + return BreakState.FALSE; + } + + // GB 10 (E_Base | EBG) Extend* × E_Modifier + if ((previous === GraphemeTypes.E_BASE || previous === GraphemeTypes.E_BASE_GAZ) && current === GraphemeTypes.E_MODIFIER) { + return BreakState.FALSE; + } + + if (previous === BreakState.EMOJI_EXTEND && current === GraphemeTypes.E_MODIFIER) { + return BreakState.FALSE; + } + + // GB 11 ZWJ × (Glue_After_Zwj | EBG) + if (previous === GraphemeTypes.ZWJ && (current === GraphemeTypes.GLUE_AFTER_ZWJ || current === GraphemeTypes.E_BASE_GAZ)) { + return BreakState.FALSE; + } + + // GB 12 sot (RI RI)* RI × RI + // GB 13 [^RI] (RI RI)* RI × RI + if (previous === GraphemeTypes.REGIONAL_INDICATOR && current === GraphemeTypes.REGIONAL_INDICATOR) { + // return BreakState.False; + return BreakState.REGIONAL_SECOND; + } + + if (previous === BreakState.REGIONAL_SECOND && current === GraphemeTypes.REGIONAL_INDICATOR) { + return BreakState.TRUE; + } + + // GB 999 + return BreakState.TRUE; +} + +export class GraphemeClusterIterator { + public wcwidth: number = 0; + private _wcwidth: number = 0; + public breakPosition: number = -1; + constructor( + public data: string, + public current: number, + public end: number, + public lastType: GraphemeTypes | BreakState = GraphemeTypes.CONTROL + ) {} + public next(): void { + this.wcwidth = this._wcwidth; + this._wcwidth = 0; + for (let i = this.current; i < this.end; ++i) { + let code = this.data.charCodeAt(i) | 0; + if (0xD800 <= code && code <= 0xDBFF) { + i++; + let low = this.data.charCodeAt(i); + if (isNaN(low)) { + this.lastType = BreakState.SURROGATE; + this.breakPosition = -1; + this.current = this.end; + i = this.end; + return; + } + code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; + } + this._wcwidth += wcwidth(code); + let currentType: GraphemeTypes | BreakState = graphemeType(code); + let breakState = canBreak(currentType, this.lastType); + if (breakState === BreakState.TRUE) { + this.breakPosition = (code > 65535) ? i - 1 : i; + this.current = i + 1; + this.lastType = currentType; + return; + } + if (breakState === BreakState.REGIONAL_SECOND) { + this.lastType = BreakState.REGIONAL_SECOND; + } else if (breakState === BreakState.EMOJI_EXTEND) { + this.lastType = BreakState.EMOJI_EXTEND; + } + } + this.wcwidth = this._wcwidth; + this.current = this.end + 1; + this.breakPosition = this.end; + } +} \ No newline at end of file diff --git a/src/GraphemeData.ts b/src/GraphemeData.ts new file mode 100644 index 0000000000..252631a9ad --- /dev/null +++ b/src/GraphemeData.ts @@ -0,0 +1,8 @@ +// FIRST: 0 <= codepoint < 12443 +export const FIRST: string = 'CgECARJfIQ0B//9UcP8UB/8ILQEBAQIBAgEBOAYKCwEBLhUQAWUHAQEGAgIBBCEBAQEeG1sLOgkiBAEJAQMBBSsDeA4BIAE2AQEBAQMIBAECAQcKAh0BAjgBAQECBAICAgIBCQEKAh0CATgBAQMCBAICAwMBHgIDAQsCATgBAQMFAQIBAQIBFAIWBgEBAjgBAQIBBAICAgIBCAIKAh4BOwEBAQIDAwEDAQkBKAEDOgMEAQMBBAcCCwIdAQI4AQEBAQIBAgEBAgECAgcCCwIcAgI3AgEBAgQBAwEDAQEIAQoCHgJGAQQBAgMBAQEHARICPQEBAQcMCGIBAQEGAQILBkoCGwEBAQEBBAIxDgEFAQIFCwEkCQFmBAEGAQICAhcCAgQDEAQNAQEBAgYBDwFiYEhY/14D////tQMdAx0CHgJAAgEHCAECCwkBLQMBdgIiAXYDBAIDBAIBBgPbAgIBOQEBAQcBAQEBAggGCgIBMA9BBAEvAQEFAQEFAQImCQwCAR4BBAICAQM4AQECAwEBAwIwCAgCApgDAQ0BBwQBBAIBAgECxjoBBf//DQEBAQIYBzEQYCH//////zEBIgEBAVICYgEOAQEEVgH//////48DjQFgIP//LAZpAgCnpqCgoACwCwCwsLCwsJCwoLCwuQsLCwkLCwsLCwsLCwsLm8C8sMvLwLCwvAsLywwMsLCwvAsMsLCwsLCwvAsMsLwMsLCwvAsLywwMsLCwsLy8DAywsLwLwLCwsLC8Cwy8vAvAywsLC8CwvLDAy5CwsMCwvLCwywwLDLCwsMsLCwsLCwsMC8sLCwsLC8sLywywsLCwywsLASMAsAALCwsLC8vLywsLoLCwvLwMvLC8sMvLCwsLywsLC8C8vLy8CwvAy8vLC8vLy8DLywsLywsMsMsLCwAKuKCgoLAAAA0ODg4NDg0OAAAAsLCwALCw'; +// SECOND: 42606 <= codepoint < 65536 +export const SECOND: string = 'AQQBCiACUAL/EQEDAQQBFwICAVgCMhACGhI0CBkLAgwdAwMBLwECBAIBBCQBQwYCAgICDAEIAQEuATMBAQMCAgUCAQEpAQICBQEB7AIBAgECAQEBEgEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwEbARsBGwwXBDEE//////////8I////////////////////////////////////OQH//+MQEBDPAZ4CUAwEAAsLCwsAsLCwy8DAywsLC8AQvAvLy8CwvLywsLwLCwsLCwsMvAywy8vAywRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFRUVFAgMKqqqqqgAAAAAAAAAAAAAAAAAAsACwsKCwoA'; +// THIRD: Supplementary Multilingual Plane (1) 0 <= codepoint < 63966 +export const THIRD: string = 'Af0BCwLgAQsDdgULCgEDCwoFAgsKDAQLCjgDCwo/AQsK5QILEAABDBABAQsQAgEMEDgPCxB/AwsQggEMELADDBCzBAsQtwIMELkCCxC9AQkRAAMLEScFCxEsAQwRLQgLEXMBCxGAAgsRggEMEbMDDBG2CQsRvwIMEcICCRHKAwsSLAMMEi8DCxIyAgwSNAELEjUBDBI2AgsSPgELEt8BCxLgAwwS4wgLEwACCxMCAgwTPAELEz4BCxM/AQwTQAELE0EEDBNHAgwTSwMME1cBCxNiAgwTZgcLE3AFCxQ1AwwUOAgLFEACDBRCAwsURQEMFEYBCxSwAQsUsQIMFLMGCxS5AQwUugELFLsCDBS9AQsUvgEMFL8CCxTBAQwUwgILFa8BCxWwAgwVsgQLFbgEDBW8AgsVvgEMFb8CCxXcAgsWMAMMFjMICxY7AgwWPQELFj4BDBY/AgsWqwELFqwBDBatAQsWrgIMFrAGCxa2AQwWtwELFx0DCxcgAgwXIgQLFyYBDBcnBQsaAQYLGgcCDBoJAgsaMwYLGjkBDBo6AQkaOwQLGkcBCxpRBgsaVwIMGlkDCxqGBAkaig0LGpcBDBqYAgscLwEMHDAHCxw4BgscPgEMHD8BCxySFgscqQEMHKoHCxyxAQwcsgILHLQBDBy1AgsdMQYLHToBCx08AgsdPwcLHUYBCR1HAQtq8AULazAHC29RLgxvjwQLvJ0CC7ygBArRZQEL0WYBDNFnAwvRbQEM0W4FC9FzCArRewgL0YUHC9GqBAvSQgML2gA3C9o7MgvadQEL2oQBC9qbBQvaoQ8L4AAHC+AIEQvgGwcL4CMCC+AmBQvo0AcL6UQHC/HmGhHzCAEO8z4BDvNzAQ7zhQEN85MBDvOkAQ7zqAEO88IDDfPHAQ3zygMN8+sBDvPtAQ7z+wUP9EICDfRGCw30ZgQQ9G4BDfRwCQ30fAEN9IEDDfSFAw30iwEO9KoBDfS7Ag71JwEO9SwBDvV0Ag31egEN9ZABDfWVAg316AEO9kUDDfZLBQ32gAEO9pIBDvajAQ32tAMN9sABDfbMAQ35GAUN+R4CDfkmAQ35MAoN+T0CDfnRDQ0='; +// FOURTH: Supplement­ary Special-purpose Plane (14) 0 <= codepoint <= highest +export const FOURTH: string = 'AAAgCgAgYAsAgIAKAQDwCwHw/woC7/8KA+7/CgTt/woF7P8KBuv/Cgfq/woI6f8KCej/Cgrn/woL5v8KDOX/Cg3k/woO4/8KD+IeCg=='; diff --git a/src/InputHandler.ts b/src/InputHandler.ts index 7b12b6de13..c3862f4d01 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -11,6 +11,7 @@ import { CHAR_DATA_CHAR_INDEX, CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CODE_INDEX } fro import { FLAGS } from './renderer/Types'; import { wcwidth } from './CharWidth'; import { EscapeSequenceParser } from './EscapeSequenceParser'; +import { GraphemeClusterIterator } from './Grapheme'; /** * Map collect to glevel. Used in `selectCharset`. @@ -307,6 +308,128 @@ export class InputHandler implements IInputHandler { } public print(data: string, start: number, end: number): void { + // let s = '😜🇺🇸👍🇺🇸🇺🇸'; + // let s = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞"; + // let s = "Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞"; + // let s = "עברית"; + // let s = '😜🇺🇸👍🇺🇸🇺🇸'; + // let s = "अनुच्छेद"; + // let s = "🌷🎁💩😜👍🏳️‍🌈"; + // let s = "뎌쉐"; + + /* + const test = new GraphemeClusterIterator(s, 0, s.length); + let carry = 0; + do { + test.next(); + console.log([s.substring(carry, test.breakPosition), test.wcwidth, test.breakPosition]); + carry = test.breakPosition; + } while (test.current < test.end); + */ + + const buffer: IBuffer = this._terminal.buffer; + const charset: ICharset = this._terminal.charset; + const screenReaderMode: boolean = this._terminal.options.screenReaderMode; + const cols: number = this._terminal.cols; + const wraparoundMode: boolean = this._terminal.wraparoundMode; + const insertMode: boolean = this._terminal.insertMode; + const curAttr: number = this._terminal.curAttr; + let bufferRow = buffer.lines.get(buffer.y + buffer.ybase); + + this._terminal.updateRange(buffer.y); + + + // FIXME: end + 1 - may lead to errors? + const it = new GraphemeClusterIterator(data, start, end + 1); + let lastBreak = start; + do { + it.next(); + + if (it.breakPosition === -1) continue; + if (lastBreak === it.breakPosition) continue; + + let chWidth = it.wcwidth; + let char = (it.breakPosition - lastBreak === 1) + ? data.charAt(lastBreak) + : data.substring(lastBreak, it.breakPosition); + // console.log(char, chWidth); + + if (charset) { + char = charset[char] || char; + } + + if (screenReaderMode) { + this._terminal.emit('a11y.char', char); + } + + // goto next line if ch would overflow + // TODO: needs a global min terminal width of 2 + if (buffer.x + chWidth - 1 >= cols) { + // autowrap - DECAWM + // automatically wraps to the beginning of the next line + if (wraparoundMode) { + buffer.x = 0; + buffer.y++; + if (buffer.y > buffer.scrollBottom) { + buffer.y--; + this._terminal.scroll(true); + } else { + // The line already exists (eg. the initial viewport), mark it as a + // wrapped line + (buffer.lines.get(buffer.y)).isWrapped = true; + } + // row changed, get it again + bufferRow = buffer.lines.get(buffer.y + buffer.ybase); + } else { + if (chWidth === 2) { + // FIXME: check for xterm behavior + // What to do here? We got a wide char that does not fit into last cell + continue; + } + // FIXME: Do we have to set buffer.x to cols - 1, if not wrapping? + } + } + + // insert mode: move characters to right + // To achieve insert, we remove cells from the right + // and insert empty ones at cursor position + if (insertMode) { + // do this twice for a fullwidth char + for (let moves = 0; moves < chWidth; ++moves) { + // remove last cell + // if it's width is 0, we have to adjust the second last cell as well + let removed = bufferRow.pop(); + if (removed[CHAR_DATA_WIDTH_INDEX] === 0 + && bufferRow[this._terminal.cols - 2] + && bufferRow[this._terminal.cols - 2][CHAR_DATA_WIDTH_INDEX] === 2) { + bufferRow[this._terminal.cols - 2] = [curAttr, ' ', 1, 32 /* ' '.charCodeAt(0) */ ]; + } + + // insert empty cell at cursor + bufferRow.splice(buffer.x, 0, [curAttr, ' ', 1, 32 /* ' '.charCodeAt(0) */ ]); + } + } + + // write current char to buffer and advance cursor + // use char cache only for char.length === 1 + bufferRow[buffer.x++] = [curAttr, char, chWidth, (char.length === 1) ? char.charCodeAt(0) : 65535]; + + // fullwidth char - also set next cell to placeholder stub and advance cursor + if (chWidth === 2) { + bufferRow[buffer.x++] = [curAttr, '', 0, undefined]; + } + + + + lastBreak = it.breakPosition; + } while (it.current < it.end); + + this._terminal.updateRange(buffer.y); + + // this.print_(data, start, end); + } + + public print_(data: string, start: number, end: number): void { let char: string; let code: number; let low: number; @@ -435,6 +558,7 @@ export class InputHandler implements IInputHandler { if (chWidth === 2) { bufferRow[buffer.x++] = [curAttr, '', 0, undefined]; } + } this._terminal.updateRange(buffer.y); }