string_decoder: make write after end to reset

Fixes: nodejs/node#16564 When StringDecoder's `end` is called, it is no longer supposed to wait for the data. If a `write` call is made after `end`, then the decoder has to be flushed and treated as a brand new write request. This patch also introduces a new StringDecoder#reset method, which simply resets all the internal data.
abhishekumar-tyagi · Oct 29, 2017 · b555911 · b555911
1 parent 22882d4
commit b555911
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 5 deletions.
diff --git a/doc/api/string_decoder.md b/doc/api/string_decoder.md
@@ -82,3 +82,13 @@ Returns a decoded string, ensuring that any incomplete multibyte characters at
 the end of the `Buffer` are omitted from the returned string and stored in an
 internal buffer for the next call to `stringDecoder.write()` or
 `stringDecoder.end()`.
+
+### stringDecoder.reset([encoding])
+<!-- YAML
+added: REPLACEME
+-->
+
+* `encoding` {string} The character encoding the `StringDecoder` will use.
+  Defaults to the current `encoding` of the decoder.
+
+Flushes all the internal data and the decoder will behave like a new object.
diff --git a/lib/string_decoder.js b/lib/string_decoder.js
@@ -41,36 +41,52 @@ function normalizeEncoding(enc) {
 // characters.
 exports.StringDecoder = StringDecoder;
 function StringDecoder(encoding) {
+  if (encoding === this.encoding && encoding !== undefined) {
+    this.lastNeed = 0;
+    this.lastTotal = 0;
+    this.lastChar = Buffer.allocUnsafe(this._nb || 0);
+    this._closed = false;
+    return;
+  }
+
   this.encoding = normalizeEncoding(encoding);
-  var nb;
   switch (this.encoding) {
     case 'utf16le':
       this.text = utf16Text;
       this.end = utf16End;
-      nb = 4;
+      this._nb = 4;
       break;
     case 'utf8':
       this.fillLast = utf8FillLast;
-      nb = 4;
+      this._nb = 4;
       break;
     case 'base64':
       this.text = base64Text;
       this.end = base64End;
-      nb = 3;
+      this._nb = 3;
       break;
     default:
       this.write = simpleWrite;
       this.end = simpleEnd;
+      this._nb = 0;
+      this._closed = false;
       return;
   }
   this.lastNeed = 0;
   this.lastTotal = 0;
-  this.lastChar = Buffer.allocUnsafe(nb);
+  this.lastChar = Buffer.allocUnsafe(this._nb);
+  this._closed = false;
 }
 
+StringDecoder.prototype.reset = StringDecoder;
+
 StringDecoder.prototype.write = function(buf) {
+  if (this._closed === true)
+    this.reset();
+
   if (buf.length === 0)
     return '';
+
   var r;
   var i;
   if (this.lastNeed) {
@@ -210,6 +226,7 @@ function utf8Text(buf, i) {
 // character.
 function utf8End(buf) {
   const r = (buf && buf.length ? this.write(buf) : '');
+  this._closed = true;
   if (this.lastNeed)
     return r + '\ufffd';
   return r;

diff --git a/test/parallel/test-string-decoder-end.js b/test/parallel/test-string-decoder-end.js
@@ -66,3 +66,33 @@ function testBuf(encoding, buf) {
   assert.strictEqual(res1, res3, 'one byte at a time should match toString');
   assert.strictEqual(res2, res3, 'all bytes at once should match toString');
 }
+
+{
+  // test to check if the write after end doesn't accumulate the data
+  const decoder = new SD('utf8');
+  const euroPart1 = Buffer.from([0xE2]);
+  const euroPart2 = Buffer.from([0x82, 0xAC]);
+  decoder.end(euroPart1);
+  const result = decoder.write(euroPart2);
+  assert.notStrictEqual(result, '€');
+}
+
+{
+  // test to check if write after end reopens the decoder
+  const decoder = new SD();
+  assert.strictEqual(decoder._closed, false);
+  decoder.end();
+  assert.strictEqual(decoder._closed, true);
+  decoder.write(Buffer.from([0xE2]));
+  assert.strictEqual(decoder._closed, false);
+}
+
+{
+  // test to check if reset after end reopens the decoder
+  const decoder = new SD();
+  assert.strictEqual(decoder._closed, false);
+  decoder.end();
+  assert.strictEqual(decoder._closed, true);
+  decoder.reset();
+  assert.strictEqual(decoder._closed, false);
+}