Skip to content

Commit

Permalink
lib: improve normalize encoding performance
Browse files Browse the repository at this point in the history
This focuses on the common case by making sure they are prioritized.
It also changes some typeof checks to test for undefined since
that is faster and it adds a benchmark.
  • Loading branch information
BridgeAR committed Feb 22, 2018
1 parent acab8bd commit f8249bb
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 33 deletions.
43 changes: 43 additions & 0 deletions benchmark/buffers/buffer-normalize-encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
'use strict';

const common = require('../common.js');

const bench = common.createBenchmark(main, {
encoding: [
'ascii',
'ASCII',
'base64',
'BASE64',
'binary',
'BINARY',
'hex',
'HEX',
'latin1',
'LATIN1',
'ucs-2',
'UCS-2',
'ucs2',
'UCS2',
'utf-16le',
'UTF-16LE',
'utf-8',
'UTF-8',
'utf16le',
'UTF16LE',
'utf8',
'UTF8'
],
n: [1e6]
}, {
flags: ['--expose-internals']
});

function main({ encoding, n }) {
const { normalizeEncoding } = require('internal/util');

bench.start();
for (var i = 0; i < n; i++) {
normalizeEncoding(encoding);
}
bench.end(n);
}
4 changes: 2 additions & 2 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ function assertSize(size) {
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
}

if (err) {
if (err !== null) {
Error.captureStackTrace(err, assertSize);
throw err;
}
Expand Down Expand Up @@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {

Buffer.isEncoding = function isEncoding(encoding) {
return typeof encoding === 'string' &&
typeof normalizeEncoding(encoding) === 'string';
normalizeEncoding(encoding) !== undefined;
};
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;

Expand Down
77 changes: 50 additions & 27 deletions lib/internal/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,36 +96,59 @@ function assertCrypto() {
throw new errors.Error('ERR_NO_CRYPTO');
}

// The loop should only run at most twice, retrying with lowercased enc
// if there is no match in the first pass.
// We use a loop instead of branching to retry with a helper
// function in order to avoid the performance hit.
// Return undefined if there is no match.
// Move the "slow cases" to a separate function to make sure this function gets
// inlined properly. That prioritizes the common case.
function normalizeEncoding(enc) {
if (enc == null || enc === '') return 'utf8';
let retried;
while (true) {
switch (enc) {
case 'utf8':
case 'utf-8':
return 'utf8';
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
return slowCases(enc);
}

function slowCases(enc) {
switch (enc.length) {
case 4:
if (enc === 'UTF8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
break;
case 3:
if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
return 'hex';
break;
case 5:
if (enc === 'ascii') return 'ascii';
if (enc === 'ucs-2') return 'utf16le';
if (enc === 'UTF-8') return 'utf8';
if (enc === 'ASCII') return 'ascii';
if (enc === 'UCS-2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf-8') return 'utf8';
if (enc === 'ascii') return 'ascii';
if (enc === 'usc-2') return 'utf16le';
break;
case 6:
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
if (enc === 'BASE64') return 'base64';
if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
enc = `${enc}`.toLowerCase();
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
break;
case 7:
if (enc === 'utf16le' || enc === 'UTF16LE' ||
`${enc}`.toLowerCase() === 'utf16le')
return 'utf16le';
case 'latin1':
case 'binary':
return 'latin1';
case 'base64':
case 'ascii':
case 'hex':
return enc;
default:
if (retried) return; // undefined
enc = ('' + enc).toLowerCase();
retried = true;
}
break;
case 8:
if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
`${enc}`.toLowerCase() === 'utf-16le')
return 'utf16le';
break;
default:
if (enc === '') return 'utf8';
}
}

Expand Down
10 changes: 6 additions & 4 deletions lib/string_decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
// modules monkey-patch it to support additional encodings
function normalizeEncoding(enc) {
const nenc = internalUtil.normalizeEncoding(enc);
if (typeof nenc !== 'string' &&
(Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return nenc || enc;
if (nenc === undefined) {
if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return enc;
}
return nenc;
}

const encodingsMap = {};
Expand Down

0 comments on commit f8249bb

Please sign in to comment.