From b5559118bba442551759b072fb14ac41c993481f Mon Sep 17 00:00:00 2001 From: "Sakthipriyan Vairamani (thefourtheye)" Date: Sun, 29 Oct 2017 23:18:25 +0530 Subject: [PATCH 1/4] string_decoder: make write after end to reset Fixes: https://github.com/nodejs/node/issues/16564 When StringDecoder's `end` is called, it is no longer supposed to wait for the data. If a `write` call is made after `end`, then the decoder has to be flushed and treated as a brand new write request. This patch also introduces a new StringDecoder#reset method, which simply resets all the internal data. --- doc/api/string_decoder.md | 10 ++++++++ lib/string_decoder.js | 27 +++++++++++++++++---- test/parallel/test-string-decoder-end.js | 30 ++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md index cde81e6ae5ec2b..748d85519b919f 100644 --- a/doc/api/string_decoder.md +++ b/doc/api/string_decoder.md @@ -82,3 +82,13 @@ Returns a decoded string, ensuring that any incomplete multibyte characters at the end of the `Buffer` are omitted from the returned string and stored in an internal buffer for the next call to `stringDecoder.write()` or `stringDecoder.end()`. + +### stringDecoder.reset([encoding]) + + +* `encoding` {string} The character encoding the `StringDecoder` will use. + Defaults to the current `encoding` of the decoder. + +Flushes all the internal data and the decoder will behave like a new object. diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 891ab5bdba65f3..113c2922878cd5 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -41,36 +41,52 @@ function normalizeEncoding(enc) { // characters. exports.StringDecoder = StringDecoder; function StringDecoder(encoding) { + if (encoding === this.encoding && encoding !== undefined) { + this.lastNeed = 0; + this.lastTotal = 0; + this.lastChar = Buffer.allocUnsafe(this._nb || 0); + this._closed = false; + return; + } + this.encoding = normalizeEncoding(encoding); - var nb; switch (this.encoding) { case 'utf16le': this.text = utf16Text; this.end = utf16End; - nb = 4; + this._nb = 4; break; case 'utf8': this.fillLast = utf8FillLast; - nb = 4; + this._nb = 4; break; case 'base64': this.text = base64Text; this.end = base64End; - nb = 3; + this._nb = 3; break; default: this.write = simpleWrite; this.end = simpleEnd; + this._nb = 0; + this._closed = false; return; } this.lastNeed = 0; this.lastTotal = 0; - this.lastChar = Buffer.allocUnsafe(nb); + this.lastChar = Buffer.allocUnsafe(this._nb); + this._closed = false; } +StringDecoder.prototype.reset = StringDecoder; + StringDecoder.prototype.write = function(buf) { + if (this._closed === true) + this.reset(); + if (buf.length === 0) return ''; + var r; var i; if (this.lastNeed) { @@ -210,6 +226,7 @@ function utf8Text(buf, i) { // character. function utf8End(buf) { const r = (buf && buf.length ? this.write(buf) : ''); + this._closed = true; if (this.lastNeed) return r + '\ufffd'; return r; diff --git a/test/parallel/test-string-decoder-end.js b/test/parallel/test-string-decoder-end.js index 0284ee9f6c48c7..1f2914ed5ff64f 100644 --- a/test/parallel/test-string-decoder-end.js +++ b/test/parallel/test-string-decoder-end.js @@ -66,3 +66,33 @@ function testBuf(encoding, buf) { assert.strictEqual(res1, res3, 'one byte at a time should match toString'); assert.strictEqual(res2, res3, 'all bytes at once should match toString'); } + +{ + // test to check if the write after end doesn't accumulate the data + const decoder = new SD('utf8'); + const euroPart1 = Buffer.from([0xE2]); + const euroPart2 = Buffer.from([0x82, 0xAC]); + decoder.end(euroPart1); + const result = decoder.write(euroPart2); + assert.notStrictEqual(result, '€'); +} + +{ + // test to check if write after end reopens the decoder + const decoder = new SD(); + assert.strictEqual(decoder._closed, false); + decoder.end(); + assert.strictEqual(decoder._closed, true); + decoder.write(Buffer.from([0xE2])); + assert.strictEqual(decoder._closed, false); +} + +{ + // test to check if reset after end reopens the decoder + const decoder = new SD(); + assert.strictEqual(decoder._closed, false); + decoder.end(); + assert.strictEqual(decoder._closed, true); + decoder.reset(); + assert.strictEqual(decoder._closed, false); +} From dbf8fd205cf6600f4a5cc4760018b0f852d1b0f6 Mon Sep 17 00:00:00 2001 From: "Sakthipriyan Vairamani (thefourtheye)" Date: Mon, 30 Oct 2017 09:41:58 +0530 Subject: [PATCH 2/4] sort the doc section --- doc/api/string_decoder.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md index 748d85519b919f..1360763ac08fac 100644 --- a/doc/api/string_decoder.md +++ b/doc/api/string_decoder.md @@ -66,6 +66,16 @@ substitution characters appropriate for the character encoding. If the `buffer` argument is provided, one final call to `stringDecoder.write()` is performed before returning the remaining input. +### stringDecoder.reset([encoding]) + + +* `encoding` {string} The character encoding the `StringDecoder` will use. + Defaults to the current `encoding` of the decoder. + +Flushes all the internal data and the decoder will behave like a new object. + ### stringDecoder.write(buffer) - -* `encoding` {string} The character encoding the `StringDecoder` will use. - Defaults to the current `encoding` of the decoder. - -Flushes all the internal data and the decoder will behave like a new object. From f34b251aab6067f3310bee4f43ab3c1b25b238e0 Mon Sep 17 00:00:00 2001 From: "Sakthipriyan Vairamani (thefourtheye)" Date: Mon, 6 Nov 2017 12:09:52 +0530 Subject: [PATCH 3/4] make write hot path efficient --- lib/string_decoder.js | 29 +++++++++++++++--------- test/parallel/test-string-decoder-end.js | 20 ---------------- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 113c2922878cd5..6ab7a9fd616f19 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -45,7 +45,6 @@ function StringDecoder(encoding) { this.lastNeed = 0; this.lastTotal = 0; this.lastChar = Buffer.allocUnsafe(this._nb || 0); - this._closed = false; return; } @@ -53,7 +52,6 @@ function StringDecoder(encoding) { switch (this.encoding) { case 'utf16le': this.text = utf16Text; - this.end = utf16End; this._nb = 4; break; case 'utf8': @@ -62,28 +60,21 @@ function StringDecoder(encoding) { break; case 'base64': this.text = base64Text; - this.end = base64End; this._nb = 3; break; default: this.write = simpleWrite; - this.end = simpleEnd; this._nb = 0; - this._closed = false; return; } this.lastNeed = 0; this.lastTotal = 0; this.lastChar = Buffer.allocUnsafe(this._nb); - this._closed = false; } StringDecoder.prototype.reset = StringDecoder; StringDecoder.prototype.write = function(buf) { - if (this._closed === true) - this.reset(); - if (buf.length === 0) return ''; @@ -103,7 +94,7 @@ StringDecoder.prototype.write = function(buf) { return r || ''; }; -StringDecoder.prototype.end = utf8End; +StringDecoder.prototype.end = end; // Returns only complete characters in a Buffer StringDecoder.prototype.text = utf8Text; @@ -226,7 +217,6 @@ function utf8Text(buf, i) { // character. function utf8End(buf) { const r = (buf && buf.length ? this.write(buf) : ''); - this._closed = true; if (this.lastNeed) return r + '\ufffd'; return r; @@ -299,3 +289,20 @@ function simpleWrite(buf) { function simpleEnd(buf) { return (buf && buf.length ? this.write(buf) : ''); } + +function end(buf) { + let result = ''; + + if (this.encoding === 'utf16le') + result = utf16End.call(this, buf); + else if (this.encoding === 'utf8') + result = utf8End.call(this, buf); + else if (this.encoding === 'base64') + result = base64End.call(this, buf); + else + result = simpleEnd.call(this, buf); + + this.reset(); + + return result; +} diff --git a/test/parallel/test-string-decoder-end.js b/test/parallel/test-string-decoder-end.js index 1f2914ed5ff64f..b9ccc19a6748d5 100644 --- a/test/parallel/test-string-decoder-end.js +++ b/test/parallel/test-string-decoder-end.js @@ -76,23 +76,3 @@ function testBuf(encoding, buf) { const result = decoder.write(euroPart2); assert.notStrictEqual(result, '€'); } - -{ - // test to check if write after end reopens the decoder - const decoder = new SD(); - assert.strictEqual(decoder._closed, false); - decoder.end(); - assert.strictEqual(decoder._closed, true); - decoder.write(Buffer.from([0xE2])); - assert.strictEqual(decoder._closed, false); -} - -{ - // test to check if reset after end reopens the decoder - const decoder = new SD(); - assert.strictEqual(decoder._closed, false); - decoder.end(); - assert.strictEqual(decoder._closed, true); - decoder.reset(); - assert.strictEqual(decoder._closed, false); -} From 7c2286fabeb62813b14caff1c3ddb0b9fc6fe3f4 Mon Sep 17 00:00:00 2001 From: "Sakthipriyan Vairamani (thefourtheye)" Date: Tue, 5 Dec 2017 14:07:44 +0530 Subject: [PATCH 4/4] incorporate review comments --- lib/string_decoder.js | 49 +++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 6ab7a9fd616f19..e8a29ff59cd493 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -215,9 +215,9 @@ function utf8Text(buf, i) { // For UTF-8, a replacement character is added when ending on a partial // character. -function utf8End(buf) { - const r = (buf && buf.length ? this.write(buf) : ''); - if (this.lastNeed) +function utf8End(ctx, buf) { + const r = (buf && buf.length ? ctx.write(buf) : ''); + if (ctx.lastNeed) return r + '\ufffd'; return r; } @@ -249,11 +249,11 @@ function utf16Text(buf, i) { // For UTF-16LE we do not explicitly append special replacement characters if we // end on a partial character, we simply let v8 handle that. -function utf16End(buf) { - const r = (buf && buf.length ? this.write(buf) : ''); - if (this.lastNeed) { - const end = this.lastTotal - this.lastNeed; - return r + this.lastChar.toString('utf16le', 0, end); +function utf16End(ctx, buf) { + const r = (buf && buf.length ? ctx.write(buf) : ''); + if (ctx.lastNeed) { + const end = ctx.lastTotal - ctx.lastNeed; + return r + ctx.lastChar.toString('utf16le', 0, end); } return r; } @@ -274,10 +274,10 @@ function base64Text(buf, i) { } -function base64End(buf) { - const r = (buf && buf.length ? this.write(buf) : ''); - if (this.lastNeed) - return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed); +function base64End(ctx, buf) { + const r = (buf && buf.length ? ctx.write(buf) : ''); + if (ctx.lastNeed) + return r + ctx.lastChar.toString('base64', 0, 3 - ctx.lastNeed); return r; } @@ -286,23 +286,18 @@ function simpleWrite(buf) { return buf.toString(this.encoding); } -function simpleEnd(buf) { - return (buf && buf.length ? this.write(buf) : ''); +function simpleEnd(ctx, buf) { + return (buf && buf.length ? ctx.write(buf) : ''); } -function end(buf) { - let result = ''; - - if (this.encoding === 'utf16le') - result = utf16End.call(this, buf); - else if (this.encoding === 'utf8') - result = utf8End.call(this, buf); - else if (this.encoding === 'base64') - result = base64End.call(this, buf); - else - result = simpleEnd.call(this, buf); - - this.reset(); +const endMappings = new Map([ + ['utf16le', utf16End], + ['utf8', utf8End], + ['base64', base64End] +]); +function end(buf) { + const result = (endMappings.get(this.encoding) || simpleEnd)(this, buf); + this.reset(this.encoding); return result; }