Skip to content

Commit

Permalink
string_decoder: rewrite implementation
Browse files Browse the repository at this point in the history
This commit provides a rewrite of StringDecoder that both improves
performance (for non-single-byte encodings) and understandability.

Additionally, StringDecoder instantiation performance has increased
considerably due to inlinability and more efficient encoding name
checking.

PR-URL: nodejs#6777
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Anna Henningsen <[email protected]>
Reviewed-By: Ben Noordhuis <[email protected]>
  • Loading branch information
mscdex authored and Fishrock123 committed May 30, 2016
1 parent 6e98b22 commit 0c67c1b
Show file tree
Hide file tree
Showing 5 changed files with 314 additions and 208 deletions.
22 changes: 22 additions & 0 deletions benchmark/string_decoder/string-decoder-create.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
'use strict';
const common = require('../common.js');
const StringDecoder = require('string_decoder').StringDecoder;

const bench = common.createBenchmark(main, {
encoding: [
'ascii', 'utf8', 'utf-8', 'base64', 'ucs2', 'UTF-8', 'AscII', 'UTF-16LE'
],
n: [25e6]
});

function main(conf) {
const encoding = conf.encoding;
const n = conf.n | 0;

bench.start();
for (var i = 0; i < n; ++i) {
const sd = new StringDecoder(encoding);
!!sd.encoding;
}
bench.end(n);
}
72 changes: 50 additions & 22 deletions benchmark/string_decoder/string-decoder.js
Original file line number Diff line number Diff line change
@@ -1,51 +1,79 @@
'use strict';
var common = require('../common.js');
var StringDecoder = require('string_decoder').StringDecoder;
const common = require('../common.js');
const StringDecoder = require('string_decoder').StringDecoder;

var bench = common.createBenchmark(main, {
encoding: ['ascii', 'utf8', 'base64-utf8', 'base64-ascii'],
inlen: [32, 128, 1024],
const bench = common.createBenchmark(main, {
encoding: ['ascii', 'utf8', 'base64-utf8', 'base64-ascii', 'utf16le'],
inlen: [32, 128, 1024, 4096],
chunk: [16, 64, 256, 1024],
n: [25e4]
n: [25e5]
});

var UTF_ALPHA = 'Blåbærsyltetøy';
var ASC_ALPHA = 'Blueberry jam';
const UTF8_ALPHA = 'Blåbærsyltetøy';
const ASC_ALPHA = 'Blueberry jam';
const UTF16_BUF = Buffer.from('Blåbærsyltetøy', 'utf16le');

function main(conf) {
var encoding = conf.encoding;
var inLen = conf.inlen | 0;
var chunkLen = conf.chunk | 0;
var n = conf.n | 0;
const encoding = conf.encoding;
const inLen = conf.inlen | 0;
const chunkLen = conf.chunk | 0;
const n = conf.n | 0;

var alpha;
var chunks = [];
var buf;
const chunks = [];
var str = '';
var isBase64 = (encoding === 'base64-ascii' || encoding === 'base64-utf8');
const isBase64 = (encoding === 'base64-ascii' || encoding === 'base64-utf8');
var i;

if (encoding === 'ascii' || encoding === 'base64-ascii')
alpha = ASC_ALPHA;
else if (encoding === 'utf8' || encoding === 'base64-utf8')
alpha = UTF_ALPHA;
else
alpha = UTF8_ALPHA;
else if (encoding === 'utf16le') {
buf = UTF16_BUF;
str = Buffer.alloc(0);
} else
throw new Error('Bad encoding');

var sd = new StringDecoder(isBase64 ? 'base64' : encoding);
const sd = new StringDecoder(isBase64 ? 'base64' : encoding);

for (i = 0; i < inLen; ++i) {
if (i > 0 && (i % chunkLen) === 0 && !isBase64) {
chunks.push(Buffer.from(str, encoding));
str = '';
if (alpha) {
chunks.push(Buffer.from(str, encoding));
str = '';
} else {
chunks.push(str);
str = Buffer.alloc(0);
}
}
if (alpha)
str += alpha[i % alpha.length];
else {
var start = i;
var end = i + 2;
if (i % 2 !== 0) {
++start;
++end;
}
str = Buffer.concat([
str,
buf.slice(start % buf.length, end % buf.length)
]);
}
str += alpha[i % alpha.length];
}
if (str.length > 0 && !isBase64)

if (!alpha) {
if (str.length > 0)
chunks.push(str);
} else if (str.length > 0 && !isBase64)
chunks.push(Buffer.from(str, encoding));

if (isBase64) {
str = Buffer.from(str, 'utf8').toString('base64');
while (str.length > 0) {
var len = Math.min(chunkLen, str.length);
const len = Math.min(chunkLen, str.length);
chunks.push(Buffer.from(str.substring(0, len), 'utf8'));
str = str.substring(len);
}
Expand Down
Loading

0 comments on commit 0c67c1b

Please sign in to comment.