From b5559118bba442551759b072fb14ac41c993481f Mon Sep 17 00:00:00 2001
From: "Sakthipriyan Vairamani (thefourtheye)" <thechargingvolcano@gmail.com>
Date: Sun, 29 Oct 2017 23:18:25 +0530
Subject: [PATCH 1/4] string_decoder: make write after end to reset

Fixes: https://github.com/nodejs/node/issues/16564

When StringDecoder's `end` is called, it is no longer supposed to wait
for the data. If a `write` call is made after `end`, then the decoder
has to be flushed and treated as a brand new write request.

This patch also introduces a new StringDecoder#reset method, which
simply resets all the internal data.
---
 doc/api/string_decoder.md                | 10 ++++++++
 lib/string_decoder.js                    | 27 +++++++++++++++++----
 test/parallel/test-string-decoder-end.js | 30 ++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md
index cde81e6ae5ec2b..748d85519b919f 100644
--- a/doc/api/string_decoder.md
+++ b/doc/api/string_decoder.md
@@ -82,3 +82,13 @@ Returns a decoded string, ensuring that any incomplete multibyte characters at
 the end of the `Buffer` are omitted from the returned string and stored in an
 internal buffer for the next call to `stringDecoder.write()` or
 `stringDecoder.end()`.
+
+### stringDecoder.reset([encoding])
+<!-- YAML
+added: REPLACEME
+-->
+
+* `encoding` {string} The character encoding the `StringDecoder` will use.
+  Defaults to the current `encoding` of the decoder.
+
+Flushes all the internal data and the decoder will behave like a new object.
diff --git a/lib/string_decoder.js b/lib/string_decoder.js
index 891ab5bdba65f3..113c2922878cd5 100644
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@@ -41,36 +41,52 @@ function normalizeEncoding(enc) {
 // characters.
 exports.StringDecoder = StringDecoder;
 function StringDecoder(encoding) {
+  if (encoding === this.encoding && encoding !== undefined) {
+    this.lastNeed = 0;
+    this.lastTotal = 0;
+    this.lastChar = Buffer.allocUnsafe(this._nb || 0);
+    this._closed = false;
+    return;
+  }
+
   this.encoding = normalizeEncoding(encoding);
-  var nb;
   switch (this.encoding) {
     case 'utf16le':
       this.text = utf16Text;
       this.end = utf16End;
-      nb = 4;
+      this._nb = 4;
       break;
     case 'utf8':
       this.fillLast = utf8FillLast;
-      nb = 4;
+      this._nb = 4;
       break;
     case 'base64':
       this.text = base64Text;
       this.end = base64End;
-      nb = 3;
+      this._nb = 3;
       break;
     default:
       this.write = simpleWrite;
       this.end = simpleEnd;
+      this._nb = 0;
+      this._closed = false;
       return;
   }
   this.lastNeed = 0;
   this.lastTotal = 0;
-  this.lastChar = Buffer.allocUnsafe(nb);
+  this.lastChar = Buffer.allocUnsafe(this._nb);
+  this._closed = false;
 }
 
+StringDecoder.prototype.reset = StringDecoder;
+
 StringDecoder.prototype.write = function(buf) {
+  if (this._closed === true)
+    this.reset();
+
   if (buf.length === 0)
     return '';
+
   var r;
   var i;
   if (this.lastNeed) {
@@ -210,6 +226,7 @@ function utf8Text(buf, i) {
 // character.
 function utf8End(buf) {
   const r = (buf && buf.length ? this.write(buf) : '');
+  this._closed = true;
   if (this.lastNeed)
     return r + '\ufffd';
   return r;
diff --git a/test/parallel/test-string-decoder-end.js b/test/parallel/test-string-decoder-end.js
index 0284ee9f6c48c7..1f2914ed5ff64f 100644
--- a/test/parallel/test-string-decoder-end.js
+++ b/test/parallel/test-string-decoder-end.js
@@ -66,3 +66,33 @@ function testBuf(encoding, buf) {
   assert.strictEqual(res1, res3, 'one byte at a time should match toString');
   assert.strictEqual(res2, res3, 'all bytes at once should match toString');
 }
+
+{
+  // test to check if the write after end doesn't accumulate the data
+  const decoder = new SD('utf8');
+  const euroPart1 = Buffer.from([0xE2]);
+  const euroPart2 = Buffer.from([0x82, 0xAC]);
+  decoder.end(euroPart1);
+  const result = decoder.write(euroPart2);
+  assert.notStrictEqual(result, '€');
+}
+
+{
+  // test to check if write after end reopens the decoder
+  const decoder = new SD();
+  assert.strictEqual(decoder._closed, false);
+  decoder.end();
+  assert.strictEqual(decoder._closed, true);
+  decoder.write(Buffer.from([0xE2]));
+  assert.strictEqual(decoder._closed, false);
+}
+
+{
+  // test to check if reset after end reopens the decoder
+  const decoder = new SD();
+  assert.strictEqual(decoder._closed, false);
+  decoder.end();
+  assert.strictEqual(decoder._closed, true);
+  decoder.reset();
+  assert.strictEqual(decoder._closed, false);
+}

From dbf8fd205cf6600f4a5cc4760018b0f852d1b0f6 Mon Sep 17 00:00:00 2001
From: "Sakthipriyan Vairamani (thefourtheye)" <thechargingvolcano@gmail.com>
Date: Mon, 30 Oct 2017 09:41:58 +0530
Subject: [PATCH 2/4] sort the doc section

---
 doc/api/string_decoder.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md
index 748d85519b919f..1360763ac08fac 100644
--- a/doc/api/string_decoder.md
+++ b/doc/api/string_decoder.md
@@ -66,6 +66,16 @@ substitution characters appropriate for the character encoding.
 If the `buffer` argument is provided, one final call to `stringDecoder.write()`
 is performed before returning the remaining input.
 
+### stringDecoder.reset([encoding])
+<!-- YAML
+added: REPLACEME
+-->
+
+* `encoding` {string} The character encoding the `StringDecoder` will use.
+  Defaults to the current `encoding` of the decoder.
+
+Flushes all the internal data and the decoder will behave like a new object.
+
 ### stringDecoder.write(buffer)
 <!-- YAML
 added: v0.1.99
@@ -83,12 +93,3 @@ the end of the `Buffer` are omitted from the returned string and stored in an
 internal buffer for the next call to `stringDecoder.write()` or
 `stringDecoder.end()`.
 
-### stringDecoder.reset([encoding])
-<!-- YAML
-added: REPLACEME
--->
-
-* `encoding` {string} The character encoding the `StringDecoder` will use.
-  Defaults to the current `encoding` of the decoder.
-
-Flushes all the internal data and the decoder will behave like a new object.

From f34b251aab6067f3310bee4f43ab3c1b25b238e0 Mon Sep 17 00:00:00 2001
From: "Sakthipriyan Vairamani (thefourtheye)" <thechargingvolcano@gmail.com>
Date: Mon, 6 Nov 2017 12:09:52 +0530
Subject: [PATCH 3/4] make write hot path efficient

---
 lib/string_decoder.js                    | 29 +++++++++++++++---------
 test/parallel/test-string-decoder-end.js | 20 ----------------
 2 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/lib/string_decoder.js b/lib/string_decoder.js
index 113c2922878cd5..6ab7a9fd616f19 100644
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@@ -45,7 +45,6 @@ function StringDecoder(encoding) {
     this.lastNeed = 0;
     this.lastTotal = 0;
     this.lastChar = Buffer.allocUnsafe(this._nb || 0);
-    this._closed = false;
     return;
   }
 
@@ -53,7 +52,6 @@ function StringDecoder(encoding) {
   switch (this.encoding) {
     case 'utf16le':
       this.text = utf16Text;
-      this.end = utf16End;
       this._nb = 4;
       break;
     case 'utf8':
@@ -62,28 +60,21 @@ function StringDecoder(encoding) {
       break;
     case 'base64':
       this.text = base64Text;
-      this.end = base64End;
       this._nb = 3;
       break;
     default:
       this.write = simpleWrite;
-      this.end = simpleEnd;
       this._nb = 0;
-      this._closed = false;
       return;
   }
   this.lastNeed = 0;
   this.lastTotal = 0;
   this.lastChar = Buffer.allocUnsafe(this._nb);
-  this._closed = false;
 }
 
 StringDecoder.prototype.reset = StringDecoder;
 
 StringDecoder.prototype.write = function(buf) {
-  if (this._closed === true)
-    this.reset();
-
   if (buf.length === 0)
     return '';
 
@@ -103,7 +94,7 @@ StringDecoder.prototype.write = function(buf) {
   return r || '';
 };
 
-StringDecoder.prototype.end = utf8End;
+StringDecoder.prototype.end = end;
 
 // Returns only complete characters in a Buffer
 StringDecoder.prototype.text = utf8Text;
@@ -226,7 +217,6 @@ function utf8Text(buf, i) {
 // character.
 function utf8End(buf) {
   const r = (buf && buf.length ? this.write(buf) : '');
-  this._closed = true;
   if (this.lastNeed)
     return r + '\ufffd';
   return r;
@@ -299,3 +289,20 @@ function simpleWrite(buf) {
 function simpleEnd(buf) {
   return (buf && buf.length ? this.write(buf) : '');
 }
+
+function end(buf) {
+  let result = '';
+
+  if (this.encoding === 'utf16le')
+    result = utf16End.call(this, buf);
+  else if (this.encoding === 'utf8')
+    result = utf8End.call(this, buf);
+  else if (this.encoding === 'base64')
+    result = base64End.call(this, buf);
+  else
+    result = simpleEnd.call(this, buf);
+
+  this.reset();
+
+  return result;
+}
diff --git a/test/parallel/test-string-decoder-end.js b/test/parallel/test-string-decoder-end.js
index 1f2914ed5ff64f..b9ccc19a6748d5 100644
--- a/test/parallel/test-string-decoder-end.js
+++ b/test/parallel/test-string-decoder-end.js
@@ -76,23 +76,3 @@ function testBuf(encoding, buf) {
   const result = decoder.write(euroPart2);
   assert.notStrictEqual(result, '€');
 }
-
-{
-  // test to check if write after end reopens the decoder
-  const decoder = new SD();
-  assert.strictEqual(decoder._closed, false);
-  decoder.end();
-  assert.strictEqual(decoder._closed, true);
-  decoder.write(Buffer.from([0xE2]));
-  assert.strictEqual(decoder._closed, false);
-}
-
-{
-  // test to check if reset after end reopens the decoder
-  const decoder = new SD();
-  assert.strictEqual(decoder._closed, false);
-  decoder.end();
-  assert.strictEqual(decoder._closed, true);
-  decoder.reset();
-  assert.strictEqual(decoder._closed, false);
-}

From 7c2286fabeb62813b14caff1c3ddb0b9fc6fe3f4 Mon Sep 17 00:00:00 2001
From: "Sakthipriyan Vairamani (thefourtheye)" <thechargingvolcano@gmail.com>
Date: Tue, 5 Dec 2017 14:07:44 +0530
Subject: [PATCH 4/4] incorporate review comments

---
 lib/string_decoder.js | 49 +++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/lib/string_decoder.js b/lib/string_decoder.js
index 6ab7a9fd616f19..e8a29ff59cd493 100644
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@@ -215,9 +215,9 @@ function utf8Text(buf, i) {
 
 // For UTF-8, a replacement character is added when ending on a partial
 // character.
-function utf8End(buf) {
-  const r = (buf && buf.length ? this.write(buf) : '');
-  if (this.lastNeed)
+function utf8End(ctx, buf) {
+  const r = (buf && buf.length ? ctx.write(buf) : '');
+  if (ctx.lastNeed)
     return r + '\ufffd';
   return r;
 }
@@ -249,11 +249,11 @@ function utf16Text(buf, i) {
 
 // For UTF-16LE we do not explicitly append special replacement characters if we
 // end on a partial character, we simply let v8 handle that.
-function utf16End(buf) {
-  const r = (buf && buf.length ? this.write(buf) : '');
-  if (this.lastNeed) {
-    const end = this.lastTotal - this.lastNeed;
-    return r + this.lastChar.toString('utf16le', 0, end);
+function utf16End(ctx, buf) {
+  const r = (buf && buf.length ? ctx.write(buf) : '');
+  if (ctx.lastNeed) {
+    const end = ctx.lastTotal - ctx.lastNeed;
+    return r + ctx.lastChar.toString('utf16le', 0, end);
   }
   return r;
 }
@@ -274,10 +274,10 @@ function base64Text(buf, i) {
 }
 
 
-function base64End(buf) {
-  const r = (buf && buf.length ? this.write(buf) : '');
-  if (this.lastNeed)
-    return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed);
+function base64End(ctx, buf) {
+  const r = (buf && buf.length ? ctx.write(buf) : '');
+  if (ctx.lastNeed)
+    return r + ctx.lastChar.toString('base64', 0, 3 - ctx.lastNeed);
   return r;
 }
 
@@ -286,23 +286,18 @@ function simpleWrite(buf) {
   return buf.toString(this.encoding);
 }
 
-function simpleEnd(buf) {
-  return (buf && buf.length ? this.write(buf) : '');
+function simpleEnd(ctx, buf) {
+  return (buf && buf.length ? ctx.write(buf) : '');
 }
 
-function end(buf) {
-  let result = '';
-
-  if (this.encoding === 'utf16le')
-    result = utf16End.call(this, buf);
-  else if (this.encoding === 'utf8')
-    result = utf8End.call(this, buf);
-  else if (this.encoding === 'base64')
-    result = base64End.call(this, buf);
-  else
-    result = simpleEnd.call(this, buf);
-
-  this.reset();
+const endMappings = new Map([
+  ['utf16le', utf16End],
+  ['utf8', utf8End],
+  ['base64', base64End]
+]);
 
+function end(buf) {
+  const result = (endMappings.get(this.encoding) || simpleEnd)(this, buf);
+  this.reset(this.encoding);
   return result;
 }