Skip to content

Commit

Permalink
string_decoder: make write after end to reset
Browse files Browse the repository at this point in the history
Fixes: nodejs/node#16564

When StringDecoder's `end` is called, it is no longer supposed to wait
for the data. If a `write` call is made after `end`, then the decoder
has to be flushed and treated as a brand new write request.

This patch also introduces a new StringDecoder#reset method, which
simply resets all the internal data.
  • Loading branch information
thefourtheye committed Oct 29, 2017
1 parent 22882d4 commit b555911
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 5 deletions.
10 changes: 10 additions & 0 deletions doc/api/string_decoder.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,13 @@ Returns a decoded string, ensuring that any incomplete multibyte characters at
the end of the `Buffer` are omitted from the returned string and stored in an
internal buffer for the next call to `stringDecoder.write()` or
`stringDecoder.end()`.

### stringDecoder.reset([encoding])
<!-- YAML
added: REPLACEME
-->

* `encoding` {string} The character encoding the `StringDecoder` will use.
Defaults to the current `encoding` of the decoder.

Flushes all the internal data and the decoder will behave like a new object.
27 changes: 22 additions & 5 deletions lib/string_decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,36 +41,52 @@ function normalizeEncoding(enc) {
// characters.
exports.StringDecoder = StringDecoder;
function StringDecoder(encoding) {
if (encoding === this.encoding && encoding !== undefined) {
this.lastNeed = 0;
this.lastTotal = 0;
this.lastChar = Buffer.allocUnsafe(this._nb || 0);
this._closed = false;
return;
}

this.encoding = normalizeEncoding(encoding);
var nb;
switch (this.encoding) {
case 'utf16le':
this.text = utf16Text;
this.end = utf16End;
nb = 4;
this._nb = 4;
break;
case 'utf8':
this.fillLast = utf8FillLast;
nb = 4;
this._nb = 4;
break;
case 'base64':
this.text = base64Text;
this.end = base64End;
nb = 3;
this._nb = 3;
break;
default:
this.write = simpleWrite;
this.end = simpleEnd;
this._nb = 0;
this._closed = false;
return;
}
this.lastNeed = 0;
this.lastTotal = 0;
this.lastChar = Buffer.allocUnsafe(nb);
this.lastChar = Buffer.allocUnsafe(this._nb);
this._closed = false;
}

StringDecoder.prototype.reset = StringDecoder;

StringDecoder.prototype.write = function(buf) {
if (this._closed === true)
this.reset();

if (buf.length === 0)
return '';

var r;
var i;
if (this.lastNeed) {
Expand Down Expand Up @@ -210,6 +226,7 @@ function utf8Text(buf, i) {
// character.
function utf8End(buf) {
const r = (buf && buf.length ? this.write(buf) : '');
this._closed = true;
if (this.lastNeed)
return r + '\ufffd';
return r;
Expand Down
30 changes: 30 additions & 0 deletions test/parallel/test-string-decoder-end.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,33 @@ function testBuf(encoding, buf) {
assert.strictEqual(res1, res3, 'one byte at a time should match toString');
assert.strictEqual(res2, res3, 'all bytes at once should match toString');
}

{
// test to check if the write after end doesn't accumulate the data
const decoder = new SD('utf8');
const euroPart1 = Buffer.from([0xE2]);
const euroPart2 = Buffer.from([0x82, 0xAC]);
decoder.end(euroPart1);
const result = decoder.write(euroPart2);
assert.notStrictEqual(result, '€');
}

{
// test to check if write after end reopens the decoder
const decoder = new SD();
assert.strictEqual(decoder._closed, false);
decoder.end();
assert.strictEqual(decoder._closed, true);
decoder.write(Buffer.from([0xE2]));
assert.strictEqual(decoder._closed, false);
}

{
// test to check if reset after end reopens the decoder
const decoder = new SD();
assert.strictEqual(decoder._closed, false);
decoder.end();
assert.strictEqual(decoder._closed, true);
decoder.reset();
assert.strictEqual(decoder._closed, false);
}

0 comments on commit b555911

Please sign in to comment.