From f91efb14ebbde5c5855de158e62055d72a82b4d4 Mon Sep 17 00:00:00 2001 From: yosion-p Date: Mon, 16 Aug 2021 17:25:08 +0800 Subject: [PATCH 1/6] fix : check the chunk boundaries for surrogate pair --- test/streams-test.js | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/test/streams-test.js b/test/streams-test.js index d4054de2..6c410d51 100644 --- a/test/streams-test.js +++ b/test/streams-test.js @@ -8,13 +8,25 @@ if (!iconv.supportsStreams) var Readable = require('stream').Readable; +// If surrogate pair, merge them +function formatSurrogate(arr=[]) { + let h = arr[0], + l = arr[1] + if('\uD800' < h && h <= '\uDBFF' && '\uDC00' < l && l <= '\uDFFF'){ + return [h+l] + } + return arr +} + // Create a source stream that feeds given array of chunks. function feeder(chunks) { if (!Array.isArray(chunks)) chunks = [chunks]; var opts = {}; - if (chunks.every(function(chunk) {return typeof chunk == 'string'})) - opts.encoding = 'utf8'; + if (chunks.every(function(chunk) {return typeof chunk == 'string'})){ + opts.encoding = 'utf8'; + chunks = formatSurrogate(chunks) + } var stream = new Readable(opts); function writeChunk() { @@ -135,6 +147,12 @@ function checkDecodeStream(opts) { } describe("Streaming mode", function() { + it.only("Encoding using internal modules: utf8 with surrogates in separate chunks", checkEncodeStream({ + encoding: "utf8", + input: ["\uD83D", "\uDE3B"], + output: "f09f98bb", + })); + it("Feeder outputs strings", checkStreamOutput({ createStream: function() { return feeder(["abc", "def"]); }, outputType: 'string', From f50aeb8fad97f7f151ec44651411e810d9092587 Mon Sep 17 00:00:00 2001 From: yosion-p Date: Tue, 31 Aug 2021 16:13:16 +0800 Subject: [PATCH 2/6] fix : delete only --- test/streams-test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/streams-test.js b/test/streams-test.js index 6c410d51..70f98f84 100644 --- a/test/streams-test.js +++ b/test/streams-test.js @@ -147,7 +147,7 @@ function checkDecodeStream(opts) { } describe("Streaming mode", function() { - it.only("Encoding using internal modules: utf8 with surrogates in separate chunks", checkEncodeStream({ + it("Encoding using internal modules: utf8 with surrogates in separate chunks", checkEncodeStream({ encoding: "utf8", input: ["\uD83D", "\uDE3B"], output: "f09f98bb", From 033fe471cf65d03a665c01204aaa0b1cb5f23749 Mon Sep 17 00:00:00 2001 From: yosion-p Date: Mon, 6 Sep 2021 17:46:30 +0800 Subject: [PATCH 3/6] Upgrade: using ES6 model instead of CommonJS --- encodings/dbcs-codec.js | 6 ++- encodings/dbcs-data.js | 26 +++++++----- encodings/index.js | 41 +++++++++++-------- encodings/internal.js | 8 ++-- encodings/sbcs-codec.js | 7 +++- encodings/sbcs-data-generated.js | 2 +- encodings/sbcs-data.js | 2 +- encodings/utf16.js | 9 ++-- encodings/utf32.js | 19 +++++---- encodings/utf7.js | 11 ++--- lib/bom-handling.js | 5 ++- lib/index.js | 19 +++++---- lib/streams.js | 5 ++- package.json | 7 ++-- test/big5-test.js | 10 +++-- test/bom-test.js | 11 ++--- test/cesu8-test.js | 7 ++-- test/cyrillic-test.js | 7 ++-- test/dbcs-test.js | 10 ++--- test/gbk-test.js | 18 ++++---- test/greek-test.js | 7 ++-- test/main-test.js | 9 ++-- test/performance.js | 70 -------------------------------- test/performance/index.js | 68 +++++++++++++++++++++++++++++++ test/sbcs-test.js | 11 ++--- test/shiftjis-test.js | 7 ++-- test/streams-test.js | 39 +++++------------- test/turkish-test.js | 7 ++-- test/utf16-test.js | 9 ++-- test/utf32-test.js | 9 ++-- test/utf7-test.js | 7 ++-- 31 files changed, 248 insertions(+), 225 deletions(-) delete mode 100644 test/performance.js create mode 100644 test/performance/index.js diff --git a/encodings/dbcs-codec.js b/encodings/dbcs-codec.js index fa839170..e765fe5c 100644 --- a/encodings/dbcs-codec.js +++ b/encodings/dbcs-codec.js @@ -1,11 +1,12 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; +import pkg from 'safer-buffer' +const { Buffer } = pkg // Multibyte codec. In this scheme, a character is represented by 1 or more bytes. // Our codec supports UTF-16 surrogates, extensions for GB18030 and unicode sequences. // To save memory and loading time, we read table files only when requested. -exports._dbcs = DBCSCodec; +const _dbcs = DBCSCodec; var UNASSIGNED = -1, GB18030_CODE = -2, @@ -595,3 +596,4 @@ function findIdx(table, val) { return l; } +export default { _dbcs } diff --git a/encodings/dbcs-data.js b/encodings/dbcs-data.js index 0d17e582..990864cd 100644 --- a/encodings/dbcs-data.js +++ b/encodings/dbcs-data.js @@ -1,11 +1,12 @@ "use strict"; +import fs from 'fs'; // Description of supported double byte encodings and aliases. // Tables are not require()-d until they are needed to speed up library load. // require()-s are direct to support Browserify. -module.exports = { - +export default { + // == Japanese/ShiftJIS ==================================================== // All japanese encodings are based on JIS X set of standards: // JIS X 0201 - Single-byte encoding of ASCII + ¥ + Kana chars at 0xA1-0xDF. @@ -40,7 +41,7 @@ module.exports = { 'shiftjis': { type: '_dbcs', - table: function() { return require('./tables/shiftjis.json') }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/shiftjis.json', 'utf8')), encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E}, encodeSkipVals: [{from: 0xED40, to: 0xF940}], }, @@ -57,7 +58,7 @@ module.exports = { 'eucjp': { type: '_dbcs', - table: function() { return require('./tables/eucjp.json') }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/eucjp.json', 'utf8')), encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E}, }, @@ -84,13 +85,14 @@ module.exports = { '936': 'cp936', 'cp936': { type: '_dbcs', - table: function() { return require('./tables/cp936.json') }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp936.json', 'utf8')) }, // GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other. 'gbk': { type: '_dbcs', - table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp936.json', 'utf8')) + .concat(JSON.parse(fs.readFileSync('./encodings/tables/gbk-added.json', 'utf8'))) }, 'xgbk': 'gbk', 'isoir58': 'gbk', @@ -102,8 +104,9 @@ module.exports = { // http://www.khngai.com/chinese/charmap/tblgbk.php?page=0 'gb18030': { type: '_dbcs', - table: function() { return require('./tables/cp936.json').concat(require('./tables/gbk-added.json')) }, - gb18030: function() { return require('./tables/gb18030-ranges.json') }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp936.json', 'utf8')) + .concat(JSON.parse(fs.readFileSync('./encodings/tables/gbk-added.json', 'utf8'))), + gb18030: () => JSON.parse(fs.readFileSync('./encodings/tables/gb18030-ranges.json', 'utf8')), encodeSkipVals: [0x80], encodeAdd: {'€': 0xA2E3}, }, @@ -118,7 +121,7 @@ module.exports = { '949': 'cp949', 'cp949': { type: '_dbcs', - table: function() { return require('./tables/cp949.json') }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp949.json', 'utf8')) }, 'cseuckr': 'cp949', @@ -159,14 +162,15 @@ module.exports = { '950': 'cp950', 'cp950': { type: '_dbcs', - table: function() { return require('./tables/cp950.json') }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp950.json', 'utf8')), }, // Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus. 'big5': 'big5hkscs', 'big5hkscs': { type: '_dbcs', - table: function() { return require('./tables/cp950.json').concat(require('./tables/big5-added.json')) }, + table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp950.json', 'utf8')) + .concat(JSON.parse(fs.readFileSync('./encodings/tables/big5-added.json', 'utf8'))), encodeSkipVals: [ // Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of // https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU. diff --git a/encodings/index.js b/encodings/index.js index d95c2441..bf1135c9 100644 --- a/encodings/index.js +++ b/encodings/index.js @@ -2,22 +2,29 @@ // Update this array if you add/rename/remove files in this directory. // We support Browserify by skipping automatic module discovery and requiring modules directly. -var modules = [ - require("./internal"), - require("./utf32"), - require("./utf16"), - require("./utf7"), - require("./sbcs-codec"), - require("./sbcs-data"), - require("./sbcs-data-generated"), - require("./dbcs-codec"), - require("./dbcs-data"), -]; +const encodingList = {} +await Promise.all([ + import("./internal.js"), + import("./utf32.js"), + import("./utf16.js"), + import("./utf7.js"), + import("./sbcs-codec.js"), + import("./sbcs-data.js"), + import("./sbcs-data-generated.js"), + import("./dbcs-codec.js"), + import("./dbcs-data.js") +]) + .then(res => { // Put all encoding/alias/codec definitions to single object and export it. -for (var i = 0; i < modules.length; i++) { - var module = modules[i]; - for (var enc in module) - if (Object.prototype.hasOwnProperty.call(module, enc)) - exports[enc] = module[enc]; -} + for (var i = 0; i < res.length; i++) { + var module = res[i].default; + for (var enc in module) { + if (Object.prototype.hasOwnProperty.call(module, enc)) { + encodingList[enc] = module[enc]; + } + } + } + }); + +export default encodingList diff --git a/encodings/internal.js b/encodings/internal.js index dc1074f0..6d24e368 100644 --- a/encodings/internal.js +++ b/encodings/internal.js @@ -1,9 +1,9 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; - +import pkg from 'safer-buffer' +const { Buffer } = pkg // Export Node.js internal encodings. -module.exports = { +export default { // Encodings utf8: { type: "_internal", bomAware: true}, cesu8: { type: "_internal", bomAware: true}, @@ -46,7 +46,7 @@ InternalCodec.prototype.decoder = InternalDecoder; //------------------------------------------------------------------------------ // We use node.js internal decoder. Its signature is the same as ours. -var StringDecoder = require('string_decoder').StringDecoder; +import { StringDecoder } from "string_decoder" if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method. StringDecoder.prototype.end = function() {}; diff --git a/encodings/sbcs-codec.js b/encodings/sbcs-codec.js index abac5ffa..790cc00c 100644 --- a/encodings/sbcs-codec.js +++ b/encodings/sbcs-codec.js @@ -1,10 +1,11 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; +import pkg from 'safer-buffer' +const { Buffer } = pkg // Single-byte codec. Needs a 'chars' string parameter that contains 256 or 128 chars that // correspond to encoded bytes (if 128 - then lower half is ASCII). -exports._sbcs = SBCSCodec; +const _sbcs = SBCSCodec function SBCSCodec(codecOptions, iconv) { if (!codecOptions) throw new Error("SBCS codec is called without the data.") @@ -70,3 +71,5 @@ SBCSDecoder.prototype.write = function(buf) { SBCSDecoder.prototype.end = function() { } + +export default { _sbcs } diff --git a/encodings/sbcs-data-generated.js b/encodings/sbcs-data-generated.js index 9b482360..59cf0ad7 100644 --- a/encodings/sbcs-data-generated.js +++ b/encodings/sbcs-data-generated.js @@ -1,7 +1,7 @@ "use strict"; // Generated data for sbcs codec. Don't edit manually. Regenerate using generation/gen-sbcs.js script. -module.exports = { +export default { "437": "cp437", "737": "cp737", "775": "cp775", diff --git a/encodings/sbcs-data.js b/encodings/sbcs-data.js index 066f904e..ec21fc6d 100644 --- a/encodings/sbcs-data.js +++ b/encodings/sbcs-data.js @@ -2,7 +2,7 @@ // Manually added data to be used by sbcs codec in addition to generated one. -module.exports = { +export default { // Not supported by iconv, not sure why. "10029": "maccenteuro", "maccenteuro": { diff --git a/encodings/utf16.js b/encodings/utf16.js index 97d06692..42b7a47b 100644 --- a/encodings/utf16.js +++ b/encodings/utf16.js @@ -1,11 +1,12 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; +import pkg from 'safer-buffer' +const { Buffer } = pkg // Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js // == UTF16-BE codec. ========================================================== -exports.utf16be = Utf16BECodec; +const utf16be = Utf16BECodec function Utf16BECodec() { } @@ -73,7 +74,7 @@ Utf16BEDecoder.prototype.end = function() { // Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false). -exports.utf16 = Utf16Codec; +const utf16 = Utf16Codec function Utf16Codec(codecOptions, iconv) { this.iconv = iconv; } @@ -194,4 +195,4 @@ function detectEncoding(bufs, defaultEncoding) { return defaultEncoding || 'utf-16le'; } - +export default { utf16be, utf16 } diff --git a/encodings/utf32.js b/encodings/utf32.js index 2fa900a1..f05a15b3 100644 --- a/encodings/utf32.js +++ b/encodings/utf32.js @@ -1,10 +1,11 @@ 'use strict'; -var Buffer = require('safer-buffer').Buffer; +import pkg from 'safer-buffer' +const { Buffer } = pkg // == UTF32-LE/BE codec. ========================================================== -exports._utf32 = Utf32Codec; +const _utf32 = Utf32Codec function Utf32Codec(codecOptions, iconv) { this.iconv = iconv; @@ -12,12 +13,12 @@ function Utf32Codec(codecOptions, iconv) { this.isLE = codecOptions.isLE; } -exports.utf32le = { type: '_utf32', isLE: true }; -exports.utf32be = { type: '_utf32', isLE: false }; +const utf32le = { type: '_utf32', isLE: true } +const utf32be = { type: '_utf32', isLE: false } // Aliases -exports.ucs4le = 'utf32le'; -exports.ucs4be = 'utf32be'; +const ucs4le = 'utf32le' +const ucs4be = 'utf32be' Utf32Codec.prototype.encoder = Utf32Encoder; Utf32Codec.prototype.decoder = Utf32Decoder; @@ -189,8 +190,8 @@ Utf32Decoder.prototype.end = function() { // Encoder prepends BOM (which can be overridden with (addBOM: false}). -exports.utf32 = Utf32AutoCodec; -exports.ucs4 = 'utf32'; +const utf32 = Utf32AutoCodec +const ucs4 = 'utf32' function Utf32AutoCodec(options, iconv) { this.iconv = iconv; @@ -317,3 +318,5 @@ function detectEncoding(bufs, defaultEncoding) { // Couldn't decide (likely all zeros or not enough data). return defaultEncoding || 'utf-32le'; } + +export default { _utf32, utf32le, utf32be, ucs4le, ucs4be, utf32, ucs4 } diff --git a/encodings/utf7.js b/encodings/utf7.js index eacae34d..38cbfd3f 100644 --- a/encodings/utf7.js +++ b/encodings/utf7.js @@ -1,11 +1,12 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; +import pkg from 'safer-buffer' +const { Buffer } = pkg // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152 // See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3 -exports.utf7 = Utf7Codec; -exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7 +const utf7 = Utf7Codec +const unicode11utf7 = 'utf7' // Alias UNICODE-1-1-UTF-7 function Utf7Codec(codecOptions, iconv) { this.iconv = iconv; }; @@ -129,7 +130,7 @@ Utf7Decoder.prototype.end = function() { // * "-&" while in base64 is not allowed. -exports.utf7imap = Utf7IMAPCodec; +const utf7imap = Utf7IMAPCodec function Utf7IMAPCodec(codecOptions, iconv) { this.iconv = iconv; }; @@ -287,4 +288,4 @@ Utf7IMAPDecoder.prototype.end = function() { return res; } - +export default { utf7, unicode11utf7, utf7imap } diff --git a/lib/bom-handling.js b/lib/bom-handling.js index 10508723..191f12c7 100644 --- a/lib/bom-handling.js +++ b/lib/bom-handling.js @@ -2,7 +2,7 @@ var BOMChar = '\uFEFF'; -exports.PrependBOM = PrependBOMWrapper +const PrependBOM = PrependBOMWrapper function PrependBOMWrapper(encoder, options) { this.encoder = encoder; this.addBOM = true; @@ -24,7 +24,7 @@ PrependBOMWrapper.prototype.end = function() { //------------------------------------------------------------------------------ -exports.StripBOM = StripBOMWrapper; +const StripBOM = StripBOMWrapper function StripBOMWrapper(decoder, options) { this.decoder = decoder; this.pass = false; @@ -50,3 +50,4 @@ StripBOMWrapper.prototype.end = function() { return this.decoder.end(); } +export default { PrependBOM, StripBOM } diff --git a/lib/index.js b/lib/index.js index 657701c3..38c90ece 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,9 +1,12 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; - -var bomHandling = require("./bom-handling"), - iconv = module.exports; +import * as stream from "stream" +import pkg from 'safer-buffer' +import * as encodings from '../encodings/index.js' +import bomHandling from './bom-handling.js' +import streamFun from "./streams.js" +const { Buffer } = pkg +const iconv = {} // All codecs and aliases are kept here, keyed by encoding name/alias. // They are lazy loaded in `iconv.getCodec` from `encodings/index.js`. @@ -60,7 +63,7 @@ iconv.fromEncoding = iconv.decode; iconv._codecDataCache = {}; iconv.getCodec = function getCodec(encoding) { if (!iconv.encodings) - iconv.encodings = require("../encodings"); // Lazy load all encoding definitions. + iconv.encodings = encodings.default // Lazy load all encoding definitions. // Canonicalize encoding name: strip all non-alphanumeric chars and appended year. var enc = iconv._canonicalizeEncoding(encoding); @@ -141,7 +144,7 @@ iconv.enableStreamingAPI = function enableStreamingAPI(stream_module) { return; // Dependency-inject stream module to create IconvLite stream classes. - var streams = require("./streams")(stream_module); + var streams = streamFun(stream_module) // Not public API yet, but expose the stream classes. iconv.IconvLiteEncoderStream = streams.IconvLiteEncoderStream; @@ -162,7 +165,7 @@ iconv.enableStreamingAPI = function enableStreamingAPI(stream_module) { // Enable Streaming API automatically if 'stream' module is available and non-empty (the majority of environments). var stream_module; try { - stream_module = require("stream"); + stream_module = stream } catch (e) {} if (stream_module && stream_module.Transform) { @@ -178,3 +181,5 @@ if (stream_module && stream_module.Transform) { if ("Ā" != "\u0100") { console.error("iconv-lite warning: js files use non-utf8 encoding. See https://github.com/ashtuchkin/iconv-lite/wiki/Javascript-source-file-encodings for more info."); } + +export { iconv } \ No newline at end of file diff --git a/lib/streams.js b/lib/streams.js index a1506482..0f53d699 100644 --- a/lib/streams.js +++ b/lib/streams.js @@ -1,10 +1,11 @@ "use strict"; -var Buffer = require("safer-buffer").Buffer; +import pkg from 'safer-buffer' +const { Buffer } = pkg // NOTE: Due to 'stream' module being pretty large (~100Kb, significant in browser environments), // we opt to dependency-inject it instead of creating a hard dependency. -module.exports = function(stream_module) { +export default function streamFun (stream_module) { var Transform = stream_module.Transform; // == Encoder stream ======================================================= diff --git a/package.json b/package.json index d351115a..702abd86 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ ], "author": "Alexander Shtuchkin ", "main": "./lib/index.js", + "type": "module", "typings": "./lib/index.d.ts", "homepage": "https://github.com/ashtuchkin/iconv-lite", "bugs": "https://github.com/ashtuchkin/iconv-lite/issues", @@ -30,12 +31,12 @@ }, "devDependencies": { "async": "^3.2.0", - "c8": "^7.2.0", + "c8": "^7.8.0", "errto": "^0.2.1", "iconv": "^2.3.5", - "mocha": "^3.5.3", + "mocha": "^9.1.0", "request": "^2.88.2", - "semver": "^6.3.0", + "semver": "^7.3.0", "unorm": "^1.6.0" }, "dependencies": { diff --git a/test/big5-test.js b/test/big5-test.js index 1ca9dd91..24f90367 100644 --- a/test/big5-test.js +++ b/test/big5-test.js @@ -1,6 +1,8 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname + '/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +import { Iconv } from 'iconv' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg var testString = "中文abc", //unicode contains Big5-code and ascii testStringBig5Buffer = Buffer.from([0xa4,0xa4,0xa4,0xe5,0x61,0x62,0x63]), @@ -23,7 +25,7 @@ describe("Big5 tests", function() { it("Big5 file read decoded,compare with iconv result", function() { var contentBuffer = Buffer.from('PEhUTUw+DQo8SEVBRD4gICAgDQoJPFRJVExFPiBtZXRhILzQxdKquqjPpc6hR6SkpOW69K22IDwvVElUTEU+DQoJPG1ldGEgSFRUUC1FUVVJVj0iQ29udGVudC1UeXBlIiBDT05URU5UPSJ0ZXh0L2h0bWw7IGNoYXJzZXQ9YmlnNSI+DQo8L0hFQUQ+DQo8Qk9EWT4NCg0Ks2+sT6RArdPBY8XppKSk5br0rbahSTxicj4NCihUaGlzIHBhZ2UgdXNlcyBiaWc1IGNoYXJhY3RlciBzZXQuKTxicj4NCmNoYXJzZXQ9YmlnNQ0KDQo8L0JPRFk+DQo8L0hUTUw+', 'base64'); var str = iconv.decode(contentBuffer, "big5"); - var iconvc = new (require('iconv').Iconv)('big5','utf8'); + var iconvc = new Iconv('big5','utf8') assert.strictEqual(iconvc.convert(contentBuffer).toString(), str); }); diff --git a/test/bom-test.js b/test/bom-test.js index 51e84939..94bd04f5 100644 --- a/test/bom-test.js +++ b/test/bom-test.js @@ -1,8 +1,9 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); - -var sampleStr = '\n<俄语>данные'; +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg + +var sampleStr = '\n<俄语>данные', strBOM = '\ufeff', utf8BOM = Buffer.from([0xEF, 0xBB, 0xBF]), utf16beBOM = Buffer.from([0xFE, 0xFF]), diff --git a/test/cesu8-test.js b/test/cesu8-test.js index c4c2d3ac..d98710b9 100644 --- a/test/cesu8-test.js +++ b/test/cesu8-test.js @@ -1,6 +1,7 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg describe("CESU-8 codec", function() { it("encodes correctly", function() { diff --git a/test/cyrillic-test.js b/test/cyrillic-test.js index bcbc261c..ddb54fed 100644 --- a/test/cyrillic-test.js +++ b/test/cyrillic-test.js @@ -1,6 +1,7 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg var baseStrings = { empty: "", diff --git a/test/dbcs-test.js b/test/dbcs-test.js index 3eadfb5c..aaa2b14a 100644 --- a/test/dbcs-test.js +++ b/test/dbcs-test.js @@ -1,8 +1,8 @@ -var fs = require('fs'), - assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'), - Iconv = require('iconv').Iconv; +import assert from 'assert' +import pkg from 'safer-buffer' +import { Iconv } from 'iconv' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg // Make all valid input combinations for a given encoding and call fn with it. diff --git a/test/gbk-test.js b/test/gbk-test.js index 224b896d..39632f04 100644 --- a/test/gbk-test.js +++ b/test/gbk-test.js @@ -1,9 +1,11 @@ -var fs = require('fs'), - assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); - -var testString = "中国abc",//unicode contains GBK-code and ascii +import fs from 'fs' +import assert from 'assert' +import pkg from 'safer-buffer' +import {Iconv} from 'iconv' +const {iconv} = await import('../lib/index.js') +const { Buffer } = pkg + +const testString = "中国abc",//unicode contains GBK-code and ascii testStringGBKBuffer = Buffer.from([0xd6,0xd0,0xb9,0xfa,0x61,0x62,0x63]); describe("GBK tests", function() { @@ -18,9 +20,9 @@ describe("GBK tests", function() { }); it("GBK file read decoded,compare with iconv result", function() { - var contentBuffer = fs.readFileSync(__dirname+"/gbkFile.txt"); + var contentBuffer = fs.readFileSync("./test/gbkFile.txt"); var str = iconv.decode(contentBuffer, "GBK"); - var iconvc = new (require('iconv').Iconv)('GBK','utf8'); + var iconvc = new Iconv('GBK','utf8'); assert.strictEqual(iconvc.convert(contentBuffer).toString(), str); }); diff --git a/test/greek-test.js b/test/greek-test.js index b73b552d..6de0b90f 100644 --- a/test/greek-test.js +++ b/test/greek-test.js @@ -1,6 +1,7 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const {iconv} = await import('../lib/index.js') +const { Buffer } = pkg var baseStrings = { empty: "", diff --git a/test/main-test.js b/test/main-test.js index 060bd91d..d7bb5c7a 100644 --- a/test/main-test.js +++ b/test/main-test.js @@ -1,7 +1,8 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); - +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg + var testString = "Hello123!"; var testStringLatin1 = "Hello123!£Å÷×çþÿ¿®"; var testStringBase64 = "SGVsbG8xMjMh"; diff --git a/test/performance.js b/test/performance.js deleted file mode 100644 index 1a5bcf4d..00000000 --- a/test/performance.js +++ /dev/null @@ -1,70 +0,0 @@ - -if (module.parent) // Skip this file from testing. - return; - -var iconv = require('iconv'); -var iconv_lite = require("../"); - -var encoding = process.argv[2] || "windows-1251"; -var convertTimes = 10000; - -var encodingStrings = { - 'windows-1251': 'This is a test string 32 chars..', - 'gbk': '这是中文字符测试。。!@¥%12', - 'utf8': '这是中文字符测试。。!@¥%12This is a test string 48 chars..', -}; -// Test encoding. -var str = encodingStrings[encoding]; -if (!str) { - throw new Error('Don\'t support ' + encoding + ' performance test.'); -} -for (var i = 0; i < 13; i++) { - str = str + str; -} - -console.log('\n' + encoding + ' charset performance test:'); -console.log("\nEncoding "+str.length+" chars "+convertTimes+" times:"); - -var start = Date.now(); -var converter = new iconv.Iconv("utf8", encoding); -for (var i = 0; i < convertTimes; i++) { - var b = converter.convert(str); -} -var duration = Date.now() - start; -var mbs = convertTimes*b.length/duration/1024; - -console.log("iconv: "+duration+"ms, "+mbs.toFixed(2)+" Mb/s."); - -var start = Date.now(); -for (var i = 0; i < convertTimes; i++) { - var b = iconv_lite.encode(str, encoding); -} -var duration = Date.now() - start; -var mbs = convertTimes*b.length/duration/1024; - -console.log("iconv-lite: "+duration+"ms, "+mbs.toFixed(2)+" Mb/s."); - - -// Test decoding. -var buf = iconv_lite.encode(str, encoding); -console.log("\nDecoding "+buf.length+" bytes "+convertTimes+" times:"); - -var start = Date.now(); -var converter = new iconv.Iconv(encoding, "utf8"); -for (var i = 0; i < convertTimes; i++) { - var s = converter.convert(buf).toString(); -} -var duration = Date.now() - start; -var mbs = convertTimes*buf.length/duration/1024; - -console.log("iconv: "+duration+"ms, "+mbs.toFixed(2)+" Mb/s."); - -var start = Date.now(); -for (var i = 0; i < convertTimes; i++) { - var s = iconv_lite.decode(buf, encoding); -} -var duration = Date.now() - start; -var mbs = convertTimes*buf.length/duration/1024; - -console.log("iconv-lite: "+duration+"ms, "+mbs.toFixed(2)+" Mb/s."); - diff --git a/test/performance/index.js b/test/performance/index.js new file mode 100644 index 00000000..5906fb00 --- /dev/null +++ b/test/performance/index.js @@ -0,0 +1,68 @@ +if (module.parent) // Skip this file from testing. + return; + +var iconv = require('iconv'); +var iconv_lite = require("../../"); + +var encoding = process.argv[2] || "windows-1251"; +var convertTimes = 10000; + +var encodingStrings = { + 'windows-1251': 'This is a test string 32 chars..', + 'gbk': '这是中文字符测试。。!@¥%12', + 'utf8': '这是中文字符测试。。!@¥%12This is a test string 48 chars..', +}; +// Test encoding. +var str = encodingStrings[encoding]; +if (!str) { + throw new Error('Don\'t support ' + encoding + ' performance test.'); +} +for (var i = 0; i < 13; i++) { + str = str + str; +} + +console.log('\n' + encoding + ' charset performance test:'); +console.log("\nEncoding " + str.length + " chars " + convertTimes + " times:"); + +var start = Date.now(); +var converter = new iconv.Iconv("utf8", encoding); +for (var i = 0; i < convertTimes; i++) { + var b = converter.convert(str); +} +var duration = Date.now() - start; +var mbs = convertTimes * b.length / duration / 1024; + +console.log("iconv: " + duration + "ms, " + mbs.toFixed(2) + " Mb/s."); + +var start = Date.now(); +for (var i = 0; i < convertTimes; i++) { + var b = iconv_lite.encode(str, encoding); +} +var duration = Date.now() - start; +var mbs = convertTimes * b.length / duration / 1024; + +console.log("iconv-lite: " + duration + "ms, " + mbs.toFixed(2) + " Mb/s."); + + +// Test decoding. +var buf = iconv_lite.encode(str, encoding); +console.log("\nDecoding " + buf.length + " bytes " + convertTimes + " times:"); + +var start = Date.now(); +var converter = new iconv.Iconv(encoding, "utf8"); +for (var i = 0; i < convertTimes; i++) { + var s = converter.convert(buf).toString(); +} +var duration = Date.now() - start; +var mbs = convertTimes * buf.length / duration / 1024; + +console.log("iconv: " + duration + "ms, " + mbs.toFixed(2) + " Mb/s."); + +var start = Date.now(); +for (var i = 0; i < convertTimes; i++) { + var s = iconv_lite.decode(buf, encoding); +} +var duration = Date.now() - start; +var mbs = convertTimes * buf.length / duration / 1024; + +console.log("iconv-lite: " + duration + "ms, " + mbs.toFixed(2) + " Mb/s."); diff --git a/test/sbcs-test.js b/test/sbcs-test.js index 46f419be..cf1ffcaf 100644 --- a/test/sbcs-test.js +++ b/test/sbcs-test.js @@ -1,8 +1,9 @@ -var assert = require('assert'), - unorm = require('unorm'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'), - Iconv = require('iconv').Iconv; +import assert from 'assert' +import unorm from 'unorm' +import pkg from 'safer-buffer' +import {Iconv} from 'iconv' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg function convertWithDefault(converter, buf, def) { var res = converter.convert(buf); diff --git a/test/shiftjis-test.js b/test/shiftjis-test.js index a4d04753..b0bdcb26 100644 --- a/test/shiftjis-test.js +++ b/test/shiftjis-test.js @@ -1,6 +1,7 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname + '/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg describe("ShiftJIS tests", function() { it("ShiftJIS correctly encoded/decoded", function() { diff --git a/test/streams-test.js b/test/streams-test.js index 70f98f84..55d8ca92 100644 --- a/test/streams-test.js +++ b/test/streams-test.js @@ -1,32 +1,19 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - semver = require('semver'), - iconv = require(__dirname+'/../'); - -if (!iconv.supportsStreams) - return; - -var Readable = require('stream').Readable; - -// If surrogate pair, merge them -function formatSurrogate(arr=[]) { - let h = arr[0], - l = arr[1] - if('\uD800' < h && h <= '\uDBFF' && '\uDC00' < l && l <= '\uDFFF'){ - return [h+l] - } - return arr -} +import assert from 'assert' +import semver from 'semver' +import pkg from 'safer-buffer' +import { Readable } from 'stream' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg + +if (!iconv.supportsStreams) throw new Error('iconv.supportsStreams is null.') // Create a source stream that feeds given array of chunks. function feeder(chunks) { if (!Array.isArray(chunks)) chunks = [chunks]; var opts = {}; - if (chunks.every(function(chunk) {return typeof chunk == 'string'})){ - opts.encoding = 'utf8'; - chunks = formatSurrogate(chunks) - } + if (chunks.every(function(chunk) {return typeof chunk == 'string'})) + opts.encoding = 'utf8'; var stream = new Readable(opts); function writeChunk() { @@ -147,12 +134,6 @@ function checkDecodeStream(opts) { } describe("Streaming mode", function() { - it("Encoding using internal modules: utf8 with surrogates in separate chunks", checkEncodeStream({ - encoding: "utf8", - input: ["\uD83D", "\uDE3B"], - output: "f09f98bb", - })); - it("Feeder outputs strings", checkStreamOutput({ createStream: function() { return feeder(["abc", "def"]); }, outputType: 'string', diff --git a/test/turkish-test.js b/test/turkish-test.js index 5412dcf9..c5780514 100644 --- a/test/turkish-test.js +++ b/test/turkish-test.js @@ -1,6 +1,7 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg var ascii = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+ ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f'; diff --git a/test/utf16-test.js b/test/utf16-test.js index cb74b998..abecca9a 100644 --- a/test/utf16-test.js +++ b/test/utf16-test.js @@ -1,8 +1,9 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg -var testStr = "1aя中文☃💩"; +var testStr = "1aя中文☃💩", utf16beBuf = Buffer.from([0, 0x31, 0, 0x61, 0x04, 0x4f, 0x4e, 0x2d, 0x65, 0x87, 0x26, 0x03, 0xd8, 0x3d, 0xdc, 0xa9]), utf16leBuf = Buffer.from(testStr, 'ucs2'), utf16beBOM = Buffer.from([0xFE, 0xFF]), diff --git a/test/utf32-test.js b/test/utf32-test.js index 6da093e5..04cb9c85 100644 --- a/test/utf32-test.js +++ b/test/utf32-test.js @@ -1,7 +1,8 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'), - Iconv = require('iconv').Iconv; +import assert from 'assert' +import pkg from 'safer-buffer' +import { Iconv } from 'iconv' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg var testStr = '1aя中文☃💩', testStr2 = '❝Stray high \uD977😱 and low\uDDDD☔ surrogate values.❞', diff --git a/test/utf7-test.js b/test/utf7-test.js index 37d7cfaf..88253c21 100644 --- a/test/utf7-test.js +++ b/test/utf7-test.js @@ -1,6 +1,7 @@ -var assert = require('assert'), - Buffer = require('safer-buffer').Buffer, - iconv = require(__dirname+'/../'); +import assert from 'assert' +import pkg from 'safer-buffer' +const { iconv } = await import('../lib/index.js') +const { Buffer } = pkg // These tests are mostly from https://github.com/kkaefer/utf7 // In case of ambiguity, we do the same as iconv. For example, we encode "optional direct" characters, but leave spaces and \n\r\t as-is. From 2b5c02fb07f56d6cc6289449cd259ff85962d6fe Mon Sep 17 00:00:00 2001 From: yosion-p Date: Tue, 7 Sep 2021 10:05:15 +0800 Subject: [PATCH 4/6] art: merging the method of request JSON file --- encodings/dbcs-data.js | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/encodings/dbcs-data.js b/encodings/dbcs-data.js index 990864cd..b7c64e4b 100644 --- a/encodings/dbcs-data.js +++ b/encodings/dbcs-data.js @@ -5,6 +5,15 @@ import fs from 'fs'; // Tables are not require()-d until they are needed to speed up library load. // require()-s are direct to support Browserify. +/** + * + * @param {String} url + * @returns {Array} + */ +function readJsonFile (url) { + return JSON.parse(fs.readFileSync(url, 'utf8')) +} + export default { // == Japanese/ShiftJIS ==================================================== @@ -41,7 +50,7 @@ export default { 'shiftjis': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/shiftjis.json', 'utf8')), + table: () => readJsonFile('./encodings/tables/shiftjis.json'), encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E}, encodeSkipVals: [{from: 0xED40, to: 0xF940}], }, @@ -58,7 +67,7 @@ export default { 'eucjp': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/eucjp.json', 'utf8')), + table: () => readJsonFile('./encodings/tables/eucjp.json'), encodeAdd: {'\u00a5': 0x5C, '\u203E': 0x7E}, }, @@ -85,14 +94,13 @@ export default { '936': 'cp936', 'cp936': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp936.json', 'utf8')) + table: () => readJsonFile('./encodings/tables/cp936.json') }, // GBK (~22000 chars) is an extension of CP936 that added user-mapped chars and some other. 'gbk': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp936.json', 'utf8')) - .concat(JSON.parse(fs.readFileSync('./encodings/tables/gbk-added.json', 'utf8'))) + table: () => readJsonFile('./encodings/tables/cp936.json').concat(readJsonFile('./encodings/tables/gbk-added.json')) }, 'xgbk': 'gbk', 'isoir58': 'gbk', @@ -104,9 +112,8 @@ export default { // http://www.khngai.com/chinese/charmap/tblgbk.php?page=0 'gb18030': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp936.json', 'utf8')) - .concat(JSON.parse(fs.readFileSync('./encodings/tables/gbk-added.json', 'utf8'))), - gb18030: () => JSON.parse(fs.readFileSync('./encodings/tables/gb18030-ranges.json', 'utf8')), + table: () => readJsonFile('./encodings/tables/cp936.json').concat(readJsonFile('./encodings/tables/gbk-added.json')), + gb18030: () => readJsonFile('./encodings/tables/gb18030-ranges.json'), encodeSkipVals: [0x80], encodeAdd: {'€': 0xA2E3}, }, @@ -121,7 +128,7 @@ export default { '949': 'cp949', 'cp949': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp949.json', 'utf8')) + table: () => readJsonFile('./encodings/tables/cp949.json') }, 'cseuckr': 'cp949', @@ -162,15 +169,14 @@ export default { '950': 'cp950', 'cp950': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp950.json', 'utf8')), + table: () => readJsonFile('./encodings/tables/cp950.json'), }, // Big5 has many variations and is an extension of cp950. We use Encoding Standard's as a consensus. 'big5': 'big5hkscs', 'big5hkscs': { type: '_dbcs', - table: () => JSON.parse(fs.readFileSync('./encodings/tables/cp950.json', 'utf8')) - .concat(JSON.parse(fs.readFileSync('./encodings/tables/big5-added.json', 'utf8'))), + table: () => readJsonFile('./encodings/tables/cp950.json').concat(readJsonFile('./encodings/tables/big5-added.json')), encodeSkipVals: [ // Although Encoding Standard says we should avoid encoding to HKSCS area (See Step 1 of // https://encoding.spec.whatwg.org/#index-big5-pointer), we still do it to increase compatibility with ICU. From 3716f0f06395d5f76023348e962305b6c66291c2 Mon Sep 17 00:00:00 2001 From: yosion-p Date: Tue, 7 Sep 2021 20:25:55 +0800 Subject: [PATCH 5/6] add: CI Build --- .github/workflows/node.js.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/node.js.yml diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml new file mode 100644 index 00000000..2822cfae --- /dev/null +++ b/.github/workflows/node.js.yml @@ -0,0 +1,30 @@ +# This workflow will do a clean install of node dependencies, cache/restore them, build the source code and run tests across different versions of node +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions + +name: Node.js CI + +on: + push: +# branches: [ master ] + pull_request: +# branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + node-version: [0.10, 12.x, 14.x, 16.x] + # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ + + steps: + - uses: actions/checkout@v2 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v2 + with: + node-version: ${{ matrix.node-version }} + + - run: cd test/webpack && npm run preinstall && npm install + - run: npm i && npm test \ No newline at end of file From 778fa1c31af549f9efcf6bfa6ef8618b290261ec Mon Sep 17 00:00:00 2001 From: yosion-p <86145334+yosion-p@users.noreply.github.com> Date: Thu, 9 Sep 2021 09:33:02 +0800 Subject: [PATCH 6/6] ci-test-210909 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 92f1ed57..acbf3679 100644 --- a/README.md +++ b/README.md @@ -128,3 +128,5 @@ $ # To view test coverage: $ npm run coverage $ open coverage/lcov-report/index.html ``` + +ci-test-210909