From 9a5701a40283900b80a20f0d09e0a622017b8300 Mon Sep 17 00:00:00 2001 From: Tony Gorez Date: Sun, 28 Aug 2022 21:03:08 +0200 Subject: [PATCH 1/6] test: add more cases for parse-encoding --- test/addons/parse-encoding/test.js | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/addons/parse-encoding/test.js b/test/addons/parse-encoding/test.js index da52f2dc4248af..6a0873f59c6d04 100644 --- a/test/addons/parse-encoding/test.js +++ b/test/addons/parse-encoding/test.js @@ -13,11 +13,27 @@ assert.strictEqual(parseEncoding('binary'), 'LATIN1'); assert.strictEqual(parseEncoding('buffer'), 'BUFFER'); assert.strictEqual(parseEncoding('hex'), 'HEX'); assert.strictEqual(parseEncoding('latin1'), 'LATIN1'); + +// ucs2 variations assert.strictEqual(parseEncoding('ucs2'), 'UCS2'); +assert.strictEqual(parseEncoding('ucs-2'), 'UCS2'); +assert.strictEqual(parseEncoding('UCS2'), 'UCS2'); +assert.strictEqual(parseEncoding('UCS-2'), 'UCS2'); + +// utf8 variations assert.strictEqual(parseEncoding('utf8'), 'UTF8'); -assert.strictEqual(parseEncoding('utf-16LE'), 'UCS2'); +assert.strictEqual(parseEncoding('utf-8'), 'UTF8'); +assert.strictEqual(parseEncoding('UTF8'), 'UTF8'); +assert.strictEqual(parseEncoding('UTF-8'), 'UTF8'); + +// utf16le variations +assert.strictEqual(parseEncoding('utf16le'), 'UCS2'); +assert.strictEqual(parseEncoding('utf-16le'), 'UCS2'); +assert.strictEqual(parseEncoding('UTF16LE'), 'UCS2'); +assert.strictEqual(parseEncoding('UTF-16LE'), 'UCS2'); + +// unknown cases assert.strictEqual(parseEncoding('utf-buffer'), 'UNKNOWN'); assert.strictEqual(parseEncoding('utf-16leNOT'), 'UNKNOWN'); - assert.strictEqual(parseEncoding('linary'), 'UNKNOWN'); assert.strictEqual(parseEncoding('luffer'), 'UNKNOWN'); From 80d4c314d2b28ac914e53b663cefb15bdc06c24b Mon Sep 17 00:00:00 2001 From: Tony Gorez Date: Mon, 29 Aug 2022 18:38:52 +0200 Subject: [PATCH 2/6] test: add missing case for LATIN1 --- test/addons/parse-encoding/test.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/addons/parse-encoding/test.js b/test/addons/parse-encoding/test.js index 6a0873f59c6d04..d350861b3421b9 100644 --- a/test/addons/parse-encoding/test.js +++ b/test/addons/parse-encoding/test.js @@ -13,6 +13,7 @@ assert.strictEqual(parseEncoding('binary'), 'LATIN1'); assert.strictEqual(parseEncoding('buffer'), 'BUFFER'); assert.strictEqual(parseEncoding('hex'), 'HEX'); assert.strictEqual(parseEncoding('latin1'), 'LATIN1'); +assert.strictEqual(parseEncoding('LATIN1'), 'LATIN1'); // ucs2 variations assert.strictEqual(parseEncoding('ucs2'), 'UCS2'); From 678f74db99faafa1dd0d4157dd01e8ec59e56816 Mon Sep 17 00:00:00 2001 From: Tony Gorez Date: Mon, 29 Aug 2022 22:51:19 +0200 Subject: [PATCH 3/6] test: add missing variation cases --- test/addons/parse-encoding/test.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/addons/parse-encoding/test.js b/test/addons/parse-encoding/test.js index d350861b3421b9..536fefc6a7c99f 100644 --- a/test/addons/parse-encoding/test.js +++ b/test/addons/parse-encoding/test.js @@ -4,14 +4,18 @@ const common = require('../../common'); const assert = require('assert'); const { parseEncoding } = require(`./build/${common.buildType}/binding`); -assert.strictEqual(parseEncoding(''), 'UNKNOWN'); - assert.strictEqual(parseEncoding('ascii'), 'ASCII'); +assert.strictEqual(parseEncoding('ASCII'), 'ASCII'); assert.strictEqual(parseEncoding('base64'), 'BASE64'); +assert.strictEqual(parseEncoding('BASE64'), 'BASE64'); assert.strictEqual(parseEncoding('base64url'), 'BASE64URL'); +assert.strictEqual(parseEncoding('BASE64URL'), 'BASE64URL'); assert.strictEqual(parseEncoding('binary'), 'LATIN1'); +assert.strictEqual(parseEncoding('BINARY'), 'LATIN1'); assert.strictEqual(parseEncoding('buffer'), 'BUFFER'); +assert.strictEqual(parseEncoding('BUFFER'), 'BUFFER'); assert.strictEqual(parseEncoding('hex'), 'HEX'); +assert.strictEqual(parseEncoding('HEX'), 'HEX'); assert.strictEqual(parseEncoding('latin1'), 'LATIN1'); assert.strictEqual(parseEncoding('LATIN1'), 'LATIN1'); @@ -34,6 +38,7 @@ assert.strictEqual(parseEncoding('UTF16LE'), 'UCS2'); assert.strictEqual(parseEncoding('UTF-16LE'), 'UCS2'); // unknown cases +assert.strictEqual(parseEncoding(''), 'UNKNOWN'); assert.strictEqual(parseEncoding('utf-buffer'), 'UNKNOWN'); assert.strictEqual(parseEncoding('utf-16leNOT'), 'UNKNOWN'); assert.strictEqual(parseEncoding('linary'), 'UNKNOWN'); From 9c00c2c1c391283fc1848f575dc895473ff51c0d Mon Sep 17 00:00:00 2001 From: Tony Gorez Date: Tue, 30 Aug 2022 07:42:44 +0200 Subject: [PATCH 4/6] test: add missing unknown cases --- test/addons/parse-encoding/test.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/addons/parse-encoding/test.js b/test/addons/parse-encoding/test.js index 536fefc6a7c99f..f30e925c36791b 100644 --- a/test/addons/parse-encoding/test.js +++ b/test/addons/parse-encoding/test.js @@ -4,6 +4,7 @@ const common = require('../../common'); const assert = require('assert'); const { parseEncoding } = require(`./build/${common.buildType}/binding`); + assert.strictEqual(parseEncoding('ascii'), 'ASCII'); assert.strictEqual(parseEncoding('ASCII'), 'ASCII'); assert.strictEqual(parseEncoding('base64'), 'BASE64'); @@ -39,6 +40,8 @@ assert.strictEqual(parseEncoding('UTF-16LE'), 'UCS2'); // unknown cases assert.strictEqual(parseEncoding(''), 'UNKNOWN'); +assert.strictEqual(parseEncoding('asCOO'), 'UNKNOWN'); +assert.strictEqual(parseEncoding('hux'), 'UNKNOWN'); assert.strictEqual(parseEncoding('utf-buffer'), 'UNKNOWN'); assert.strictEqual(parseEncoding('utf-16leNOT'), 'UNKNOWN'); assert.strictEqual(parseEncoding('linary'), 'UNKNOWN'); From d508d0359c95651f3d8e8015eb846555438d5c5d Mon Sep 17 00:00:00 2001 From: Tony Gorez Date: Tue, 30 Aug 2022 19:01:09 +0200 Subject: [PATCH 5/6] test: update doc in code --- src/api/encoding.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/api/encoding.cc b/src/api/encoding.cc index 68278ff7371d80..ffbabc3b44b04a 100644 --- a/src/api/encoding.cc +++ b/src/api/encoding.cc @@ -15,7 +15,11 @@ enum encoding ParseEncoding(const char* encoding, switch (encoding[0]) { case 'u': case 'U': - // utf8, utf16le + // Note: the two first conditions are needed for performance reasons + // as "utf8"/"utf-8" is a common case. + // (same for other cases below) + + // utf, utf16le if (encoding[1] == 't' && encoding[2] == 'f') { // Skip `-` const size_t skip = encoding[3] == '-' ? 4 : 3; From 44fb62021efe440516b457cfca37314b0c27013d Mon Sep 17 00:00:00 2001 From: Tony Gorez Date: Thu, 1 Sep 2022 07:09:58 +0200 Subject: [PATCH 6/6] doc: typo Co-authored-by: Luigi Pinca --- src/api/encoding.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/encoding.cc b/src/api/encoding.cc index ffbabc3b44b04a..3ccfd6c84b7865 100644 --- a/src/api/encoding.cc +++ b/src/api/encoding.cc @@ -19,7 +19,7 @@ enum encoding ParseEncoding(const char* encoding, // as "utf8"/"utf-8" is a common case. // (same for other cases below) - // utf, utf16le + // utf8, utf16le if (encoding[1] == 't' && encoding[2] == 'f') { // Skip `-` const size_t skip = encoding[3] == '-' ? 4 : 3;