From c5ee34e39b2e855bd70f83c83120c20e1c5a5f41 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Sun, 30 Jul 2017 18:09:13 +0800 Subject: [PATCH] encoding: rudimentary TextDecoder support w/o ICU Also split up the tests. Backport-PR-URL: https://github.com/nodejs/node/pull/14786 Backport-Reviewed-By: Anna Henningsen PR-URL: https://github.com/nodejs/node/pull/14489 Reviewed-By: James M Snell Reviewed-By: Refael Ackermann --- doc/api/errors.md | 7 + doc/api/intl.md | 2 +- doc/api/util.md | 61 +-- lib/internal/encoding.js | 366 ++++++++++++------ lib/internal/errors.js | 1 + ...g.js => test-whatwg-encoding-internals.js} | 119 +----- .../test-whatwg-encoding-textdecoder.js | 104 +++++ .../test-whatwg-encoding-textencoder.js | 36 ++ 8 files changed, 428 insertions(+), 268 deletions(-) rename test/parallel/{test-whatwg-encoding.js => test-whatwg-encoding-internals.js} (56%) create mode 100644 test/parallel/test-whatwg-encoding-textdecoder.js create mode 100644 test/parallel/test-whatwg-encoding-textencoder.js diff --git a/doc/api/errors.md b/doc/api/errors.md index da63708dd49923..d9ddc299c9d9df 100644 --- a/doc/api/errors.md +++ b/doc/api/errors.md @@ -712,6 +712,12 @@ only used in the [WHATWG URL API][] for strict compliance with the specification native Node.js APIs, `func(undefined)` and `func()` are treated identically, and the [`ERR_INVALID_ARG_TYPE`][] error code may be used instead. + +### ERR_NO_ICU + +Used when an attempt is made to use features that require [ICU][], while +Node.js is not compiled with ICU support. + ### ERR_SOCKET_ALREADY_BOUND Used when an attempt is made to bind a socket that has already been bound. @@ -795,6 +801,7 @@ are most likely an indication of a bug within Node.js itself. [`new URLSearchParams(iterable)`]: url.html#url_constructor_new_urlsearchparams_iterable [`process.on('uncaughtException')`]: process.html#process_event_uncaughtexception [`process.send()`]: process.html#process_process_send_message_sendhandle_options_callback +[ICU]: intl.html#intl_internationalization_support [Node.js Error Codes]: #nodejs-error-codes [V8's stack trace API]: https://github.com/v8/v8/wiki/Stack-Trace-API [WHATWG URL API]: url.html#url_the_whatwg_url_api diff --git a/doc/api/intl.md b/doc/api/intl.md index 753932c91c25f7..faa5429bffdf06 100644 --- a/doc/api/intl.md +++ b/doc/api/intl.md @@ -52,7 +52,7 @@ option: | [WHATWG URL Parser][] | partial (no IDN support) | full | full | full | [`require('buffer').transcode()`][] | none (function does not exist) | full | full | full | [REPL][] | partial (inaccurate line editing) | full | full | full -| [`require('util').TextDecoder`][] | none (class does not exist) | partial/full (depends on OS) | partial (Unicode-only) | full +| [`require('util').TextDecoder`][] | partial (basic encodings support) | partial/full (depends on OS) | partial (Unicode-only) | full *Note*: The "(not locale-aware)" designation denotes that the function carries out its operation just like the non-`Locale` version of the function, if one diff --git a/doc/api/util.md b/doc/api/util.md index c03b3f769d78da..ee32211d9bbdd4 100644 --- a/doc/api/util.md +++ b/doc/api/util.md @@ -544,7 +544,7 @@ added: v8.0.0 A Symbol that can be used to declare custom promisified variants of functions, see [Custom promisified functions][]. -### Class: util.TextDecoder +## Class: util.TextDecoder @@ -563,23 +563,33 @@ while (buffer = getNextChunkSomehow()) { string += decoder.decode(); // end-of-stream ``` -#### WHATWG Supported Encodings +### WHATWG Supported Encodings Per the [WHATWG Encoding Standard][], the encodings supported by the `TextDecoder` API are outlined in the tables below. For each encoding, -one or more aliases may be used. Support for some encodings is enabled -only when Node.js is using the full ICU data (see [Internationalization][]). -`util.TextDecoder` is `undefined` when ICU is not enabled during build. +one or more aliases may be used. -##### Encodings Supported By Default +Different Node.js build configurations support different sets of encodings. +While a very basic set of encodings is supported even on Node.js builds without +ICU enabled, support for some encodings is provided only when Node.js is built +with ICU and using the full ICU data (see [Internationalization][]). + +#### Encodings Supported Without ICU | Encoding | Aliases | | ----------- | --------------------------------- | -| `'utf8'` | `'unicode-1-1-utf-8'`, `'utf-8'` | -| `'utf-16be'`| | +| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` | | `'utf-16le'`| `'utf-16'` | -##### Encodings Requiring Full-ICU +#### Encodings Supported by Default (With ICU) + +| Encoding | Aliases | +| ----------- | --------------------------------- | +| `'utf-8'` | `'unicode-1-1-utf-8'`, `'utf8'` | +| `'utf-16le'`| `'utf-16'` | +| `'utf-16be'`| | + +#### Encodings Requiring Full ICU Data | Encoding | Aliases | | ----------------- | -------------------------------- | @@ -621,13 +631,14 @@ only when Node.js is using the full ICU data (see [Internationalization][]). *Note*: The `'iso-8859-16'` encoding listed in the [WHATWG Encoding Standard][] is not supported. -#### new TextDecoder([encoding[, options]]) +### new TextDecoder([encoding[, options]]) * `encoding` {string} Identifies the `encoding` that this `TextDecoder` instance supports. Defaults to `'utf-8'`. * `options` {Object} * `fatal` {boolean} `true` if decoding failures are fatal. Defaults to - `false`. + `false`. This option is only supported when ICU is enabled (see + [Internationalization][]). * `ignoreBOM` {boolean} When `true`, the `TextDecoder` will include the byte order mark in the decoded result. When `false`, the byte order mark will be removed from the output. This option is only used when `encoding` is @@ -636,7 +647,7 @@ is not supported. Creates an new `TextDecoder` instance. The `encoding` may specify one of the supported encodings or an alias. -#### textDecoder.decode([input[, options]]) +### textDecoder.decode([input[, options]]) * `input` {ArrayBuffer|DataView|TypedArray} An `ArrayBuffer`, `DataView` or Typed Array instance containing the encoded data. @@ -652,27 +663,27 @@ internally and emitted after the next call to `textDecoder.decode()`. If `textDecoder.fatal` is `true`, decoding errors that occur will result in a `TypeError` being thrown. -#### textDecoder.encoding +### textDecoder.encoding -* Value: {string} +* {string} The encoding supported by the `TextDecoder` instance. -#### textDecoder.fatal +### textDecoder.fatal -* Value: {boolean} +* {boolean} The value will be `true` if decoding errors result in a `TypeError` being thrown. -#### textDecoder.ignoreBOM +### textDecoder.ignoreBOM -* Value: {boolean} +* {boolean} The value will be `true` if the decoding result will include the byte order mark. -### Class: util.TextEncoder +## Class: util.TextEncoder @@ -680,21 +691,27 @@ added: v8.3.0 > Stability: 1 - Experimental An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All -instances of `TextEncoder` only support `UTF-8` encoding. +instances of `TextEncoder` only support UTF-8 encoding. ```js const encoder = new TextEncoder(); const uint8array = encoder.encode('this is some data'); ``` -#### textEncoder.encode([input]) +### textEncoder.encode([input]) * `input` {string} The text to encode. Defaults to an empty string. * Returns: {Uint8Array} -UTF-8 Encodes the `input` string and returns a `Uint8Array` containing the +UTF-8 encodes the `input` string and returns a `Uint8Array` containing the encoded bytes. +### textDecoder.encoding + +* {string} + +The encoding supported by the `TextEncoder` instance. Always set to `'utf-8'`. + ## Deprecated APIs The following APIs have been deprecated and should no longer be used. Existing diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 22ae5c6c0db1ab..de4ebcf196284b 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -28,11 +28,12 @@ const { encodeUtf8String } = process.binding('buffer'); -const { - decode: _decode, - getConverter, - hasConverter -} = process.binding('icu'); +var Buffer; +function lazyBuffer() { + if (Buffer === undefined) + Buffer = require('buffer').Buffer; + return Buffer; +} const CONVERTER_FLAGS_FLUSH = 0x1; const CONVERTER_FLAGS_FATAL = 0x2; @@ -284,122 +285,14 @@ function getEncodingFromLabel(label) { return encodings.get(trimAsciiWhitespace(label.toLowerCase())); } -function hasTextDecoder(encoding = 'utf-8') { - if (typeof encoding !== 'string') - throw new errors.Error('ERR_INVALID_ARG_TYPE', 'encoding', 'string'); - return hasConverter(getEncodingFromLabel(encoding)); -} - -var Buffer; -function lazyBuffer() { - if (Buffer === undefined) - Buffer = require('buffer').Buffer; - return Buffer; -} - -class TextDecoder { - constructor(encoding = 'utf-8', options = {}) { - if (!warned) { - warned = true; - process.emitWarning(experimental, 'ExperimentalWarning'); - } - - encoding = `${encoding}`; - if (typeof options !== 'object') - throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); - - const enc = getEncodingFromLabel(encoding); - if (enc === undefined) - throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding); - - var flags = 0; - if (options !== null) { - flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0; - flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; - } - - const handle = getConverter(enc, flags); - if (handle === undefined) - throw new errors.Error('ERR_ENCODING_NOT_SUPPORTED', encoding); - - this[kHandle] = handle; - this[kFlags] = flags; - this[kEncoding] = enc; - } - - get encoding() { - if (this == null || this[kDecoder] !== true) - throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); - return this[kEncoding]; - } - - get fatal() { - if (this == null || this[kDecoder] !== true) - throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); - return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL; - } - - get ignoreBOM() { - if (this == null || this[kDecoder] !== true) - throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); - return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) === - CONVERTER_FLAGS_IGNORE_BOM; - } - - decode(input = empty, options = {}) { - if (this == null || this[kDecoder] !== true) - throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); - if (isArrayBuffer(input)) { - input = lazyBuffer().from(input); - } else if (!ArrayBuffer.isView(input)) { - throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input', - ['ArrayBuffer', 'ArrayBufferView']); - } - if (typeof options !== 'object') { - throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object'); - } - - var flags = 0; - if (options !== null) - flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; - - const ret = _decode(this[kHandle], input, flags); - if (typeof ret === 'number') { - const err = new errors.TypeError('ERR_ENCODING_INVALID_ENCODED_DATA', - this.encoding); - err.errno = ret; - throw err; - } - return ret.toString('ucs2'); - } - - [inspect](depth, opts) { - if (this == null || this[kDecoder] !== true) - throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); - if (typeof depth === 'number' && depth < 0) - return opts.stylize('[Object]', 'special'); - var ctor = getConstructorOf(this); - var obj = Object.create({ - constructor: ctor === null ? TextDecoder : ctor - }); - obj.encoding = this.encoding; - obj.fatal = this.fatal; - obj.ignoreBOM = this.ignoreBOM; - if (opts.showHidden) { - obj[kFlags] = this[kFlags]; - obj[kHandle] = this[kHandle]; - } - // Lazy to avoid circular dependency - return require('util').inspect(obj, opts); - } -} - class TextEncoder { constructor() { if (!warned) { warned = true; process.emitWarning(experimental, 'ExperimentalWarning'); } + + this[kEncoder] = true; } get encoding() { @@ -429,20 +322,8 @@ class TextEncoder { } } -Object.defineProperties( - TextDecoder.prototype, { - [kDecoder]: { enumerable: false, value: true, configurable: false }, - 'decode': { enumerable: true }, - 'encoding': { enumerable: true }, - 'fatal': { enumerable: true }, - 'ignoreBOM': { enumerable: true }, - [Symbol.toStringTag]: { - configurable: true, - value: 'TextDecoder' - } }); Object.defineProperties( TextEncoder.prototype, { - [kEncoder]: { enumerable: false, value: true, configurable: false }, 'encode': { enumerable: true }, 'encoding': { enumerable: true }, [Symbol.toStringTag]: { @@ -450,6 +331,237 @@ Object.defineProperties( value: 'TextEncoder' } }); +const { hasConverter, TextDecoder } = + process.binding('config').hasIntl ? + makeTextDecoderICU() : + makeTextDecoderJS(); + +function hasTextDecoder(encoding = 'utf-8') { + if (typeof encoding !== 'string') + throw new errors.Error('ERR_INVALID_ARG_TYPE', 'encoding', 'string'); + return hasConverter(getEncodingFromLabel(encoding)); +} + +function makeTextDecoderICU() { + const { + decode: _decode, + getConverter, + hasConverter + } = process.binding('icu'); + + class TextDecoder { + constructor(encoding = 'utf-8', options = {}) { + if (!warned) { + warned = true; + process.emitWarning(experimental, 'ExperimentalWarning'); + } + + encoding = `${encoding}`; + if (typeof options !== 'object') + throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); + + const enc = getEncodingFromLabel(encoding); + if (enc === undefined) + throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding); + + var flags = 0; + if (options !== null) { + flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0; + flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; + } + + const handle = getConverter(enc, flags); + if (handle === undefined) + throw new errors.Error('ERR_ENCODING_NOT_SUPPORTED', encoding); + + this[kDecoder] = true; + this[kHandle] = handle; + this[kFlags] = flags; + this[kEncoding] = enc; + } + + + decode(input = empty, options = {}) { + if (this == null || this[kDecoder] !== true) + throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); + if (isArrayBuffer(input)) { + input = lazyBuffer().from(input); + } else if (!ArrayBuffer.isView(input)) { + throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input', + ['ArrayBuffer', 'ArrayBufferView']); + } + if (typeof options !== 'object') { + throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object'); + } + + var flags = 0; + if (options !== null) + flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; + + const ret = _decode(this[kHandle], input, flags); + if (typeof ret === 'number') { + const err = new errors.TypeError('ERR_ENCODING_INVALID_ENCODED_DATA', + this.encoding); + err.errno = ret; + throw err; + } + return ret.toString('ucs2'); + } + } + + return { hasConverter, TextDecoder }; +} + +function makeTextDecoderJS() { + var StringDecoder; + function lazyStringDecoder() { + if (StringDecoder === undefined) + ({ StringDecoder } = require('string_decoder')); + return StringDecoder; + } + + const kBOMSeen = Symbol('BOM seen'); + + function hasConverter(encoding) { + return encoding === 'utf-8' || encoding === 'utf-16le'; + } + + class TextDecoder { + constructor(encoding = 'utf-8', options = {}) { + if (!warned) { + warned = true; + process.emitWarning(experimental, 'ExperimentalWarning'); + } + + encoding = `${encoding}`; + if (typeof options !== 'object') + throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); + + const enc = getEncodingFromLabel(encoding); + if (enc === undefined || !hasConverter(enc)) + throw new errors.RangeError('ERR_ENCODING_NOT_SUPPORTED', encoding); + + var flags = 0; + if (options !== null) { + if (options.fatal) { + throw new errors.TypeError('ERR_NO_ICU', '"fatal" option'); + } + flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; + } + + this[kDecoder] = true; + // StringDecoder will normalize WHATWG encoding to Node.js encoding. + this[kHandle] = new (lazyStringDecoder())(enc); + this[kFlags] = flags; + this[kEncoding] = enc; + this[kBOMSeen] = false; + } + + decode(input = empty, options = {}) { + if (this == null || this[kDecoder] !== true) + throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); + if (isArrayBuffer(input)) { + input = lazyBuffer().from(input); + } else if (ArrayBuffer.isView(input)) { + input = lazyBuffer().from(input.buffer, input.byteOffset, + input.byteLength); + } else { + throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'input', + ['ArrayBuffer', 'ArrayBufferView']); + } + if (typeof options !== 'object') { + throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'options', 'object'); + } + + if (this[kFlags] & CONVERTER_FLAGS_FLUSH) { + this[kBOMSeen] = false; + } + + if (options !== null && options.stream) { + this[kFlags] &= ~CONVERTER_FLAGS_FLUSH; + } else { + this[kFlags] |= CONVERTER_FLAGS_FLUSH; + } + + if (!this[kBOMSeen] && !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) { + if (this[kEncoding] === 'utf-8') { + if (input.length >= 3 && + input[0] === 0xEF && input[1] === 0xBB && input[2] === 0xBF) { + input = input.slice(3); + } + } else if (this[kEncoding] === 'utf-16le') { + if (input.length >= 2 && input[0] === 0xFF && input[1] === 0xFE) { + input = input.slice(2); + } + } + this[kBOMSeen] = true; + } + + if (this[kFlags] & CONVERTER_FLAGS_FLUSH) { + return this[kHandle].end(input); + } + + return this[kHandle].write(input); + } + } + + return { hasConverter, TextDecoder }; +} + +// Mix in some shared properties. +{ + Object.defineProperties( + TextDecoder.prototype, + Object.getOwnPropertyDescriptors({ + get encoding() { + if (this == null || this[kDecoder] !== true) + throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); + return this[kEncoding]; + }, + + get fatal() { + if (this == null || this[kDecoder] !== true) + throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); + return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL; + }, + + get ignoreBOM() { + if (this == null || this[kDecoder] !== true) + throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); + return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) === + CONVERTER_FLAGS_IGNORE_BOM; + }, + + [inspect](depth, opts) { + if (this == null || this[kDecoder] !== true) + throw new errors.TypeError('ERR_INVALID_THIS', 'TextDecoder'); + if (typeof depth === 'number' && depth < 0) + return opts.stylize('[Object]', 'special'); + var ctor = getConstructorOf(this); + var obj = Object.create({ + constructor: ctor === null ? TextDecoder : ctor + }); + obj.encoding = this.encoding; + obj.fatal = this.fatal; + obj.ignoreBOM = this.ignoreBOM; + if (opts.showHidden) { + obj[kFlags] = this[kFlags]; + obj[kHandle] = this[kHandle]; + } + // Lazy to avoid circular dependency + return require('util').inspect(obj, opts); + } + })); + Object.defineProperties(TextDecoder.prototype, { + decode: { enumerable: true }, + [inspect]: { enumerable: false }, + [Symbol.toStringTag]: { + configurable: true, + value: 'TextDecoder' + } + }); +} + module.exports = { getEncodingFromLabel, hasTextDecoder, diff --git a/lib/internal/errors.js b/lib/internal/errors.js index 3ee34cf428939a..3b54dcea934f83 100644 --- a/lib/internal/errors.js +++ b/lib/internal/errors.js @@ -148,6 +148,7 @@ E('ERR_MISSING_ARGS', missingArgs); E('ERR_NAPI_CONS_FUNCTION', 'Constructor must be a function'); E('ERR_NAPI_CONS_PROTOTYPE_OBJECT', 'Constructor.prototype must be an object'); E('ERR_NO_CRYPTO', 'Node.js is not compiled with OpenSSL crypto support'); +E('ERR_NO_ICU', '%s is not supported on Node.js compiled without ICU'); E('ERR_PARSE_HISTORY_DATA', 'Could not parse history data in %s'); E('ERR_SOCKET_ALREADY_BOUND', 'Socket is already bound'); E('ERR_SOCKET_BAD_TYPE', diff --git a/test/parallel/test-whatwg-encoding.js b/test/parallel/test-whatwg-encoding-internals.js similarity index 56% rename from test/parallel/test-whatwg-encoding.js rename to test/parallel/test-whatwg-encoding-internals.js index c181df860ca149..d5bf07acb31803 100644 --- a/test/parallel/test-whatwg-encoding.js +++ b/test/parallel/test-whatwg-encoding-internals.js @@ -1,129 +1,12 @@ // Flags: --expose-internals 'use strict'; -const common = require('../common'); +require('../common'); const assert = require('assert'); -const { TextEncoder, TextDecoder } = require('util'); -const { customInspectSymbol: inspect } = require('internal/util'); const { getEncodingFromLabel } = require('internal/encoding'); -const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, - 0x73, 0x74, 0xe2, 0x82, 0xac]); - -if (!common.hasIntl) { - common.skip('WHATWG Encoding tests because ICU is not present.'); -} - -// Make Sure TextDecoder and TextEncoder exist -assert(TextDecoder); -assert(TextEncoder); - -// Test TextEncoder -const enc = new TextEncoder(); -assert(enc); -const buf = enc.encode('\ufefftest€'); - -assert.strictEqual(Buffer.compare(buf, encoded), 0); - - -// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false -{ - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { - const dec = new TextDecoder(i); - const res = dec.decode(buf); - assert.strictEqual(res, 'test€'); - }); - - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { - const dec = new TextDecoder(i); - let res = ''; - res += dec.decode(buf.slice(0, 8), { stream: true }); - res += dec.decode(buf.slice(8)); - assert.strictEqual(res, 'test€'); - }); -} - -// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true -{ - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { - const dec = new TextDecoder(i, { ignoreBOM: true }); - const res = dec.decode(buf); - assert.strictEqual(res, '\ufefftest€'); - }); - - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { - const dec = new TextDecoder(i, { ignoreBOM: true }); - let res = ''; - res += dec.decode(buf.slice(0, 8), { stream: true }); - res += dec.decode(buf.slice(8)); - assert.strictEqual(res, '\ufefftest€'); - }); -} - -// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false -{ - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { - const dec = new TextDecoder(i, { fatal: true }); - assert.throws(() => dec.decode(buf.slice(0, 8)), - common.expectsError({ - code: 'ERR_ENCODING_INVALID_ENCODED_DATA', - type: TypeError, - message: - /^The encoded data was not valid for encoding utf-8$/ - })); - }); - - ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { - const dec = new TextDecoder(i, { fatal: true }); - assert.doesNotThrow(() => dec.decode(buf.slice(0, 8), { stream: true })); - assert.doesNotThrow(() => dec.decode(buf.slice(8))); - }); -} - -// Test TextDecoder, UTF-16le -{ - const dec = new TextDecoder('utf-16le'); - const res = dec.decode(Buffer.from('test€', 'utf-16le')); - assert.strictEqual(res, 'test€'); -} - -// Test TextDecoder, UTF-16be -{ - const dec = new TextDecoder('utf-16be'); - const res = dec.decode(Buffer.from([0x00, 0x74, 0x00, 0x65, 0x00, - 0x73, 0x00, 0x74, 0x20, 0xac])); - assert.strictEqual(res, 'test€'); -} - -{ - const fn = TextDecoder.prototype[inspect]; - fn.call(new TextDecoder(), Infinity, {}); - - [{}, [], true, 1, '', new TextEncoder()].forEach((i) => { - assert.throws(() => fn.call(i, Infinity, {}), - common.expectsError({ - code: 'ERR_INVALID_THIS', - message: 'Value of "this" must be of type TextDecoder' - })); - }); -} - -{ - const fn = TextEncoder.prototype[inspect]; - fn.call(new TextEncoder(), Infinity, {}); - - [{}, [], true, 1, '', new TextDecoder()].forEach((i) => { - assert.throws(() => fn.call(i, Infinity, {}), - common.expectsError({ - code: 'ERR_INVALID_THIS', - message: 'Value of "this" must be of type TextEncoder' - })); - }); -} - // Test Encoding Mappings { - const mappings = { 'utf-8': [ 'unicode-1-1-utf-8', diff --git a/test/parallel/test-whatwg-encoding-textdecoder.js b/test/parallel/test-whatwg-encoding-textdecoder.js new file mode 100644 index 00000000000000..440ccc38124ec6 --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textdecoder.js @@ -0,0 +1,104 @@ +// Flags: --expose-internals +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const { TextDecoder, TextEncoder } = require('util'); +const { customInspectSymbol: inspect } = require('internal/util'); + +const buf = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, + 0x73, 0x74, 0xe2, 0x82, 0xac]); + +// Make Sure TextDecoder exist +assert(TextDecoder); + +// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: false +{ + ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + const dec = new TextDecoder(i); + const res = dec.decode(buf); + assert.strictEqual(res, 'test€'); + }); + + ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + const dec = new TextDecoder(i); + let res = ''; + res += dec.decode(buf.slice(0, 8), { stream: true }); + res += dec.decode(buf.slice(8)); + assert.strictEqual(res, 'test€'); + }); +} + +// Test TextDecoder, UTF-8, fatal: false, ignoreBOM: true +{ + ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + const dec = new TextDecoder(i, { ignoreBOM: true }); + const res = dec.decode(buf); + assert.strictEqual(res, '\ufefftest€'); + }); + + ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + const dec = new TextDecoder(i, { ignoreBOM: true }); + let res = ''; + res += dec.decode(buf.slice(0, 8), { stream: true }); + res += dec.decode(buf.slice(8)); + assert.strictEqual(res, '\ufefftest€'); + }); +} + +// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false +if (common.hasIntl) { + ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + const dec = new TextDecoder(i, { fatal: true }); + assert.throws(() => dec.decode(buf.slice(0, 8)), + common.expectsError({ + code: 'ERR_ENCODING_INVALID_ENCODED_DATA', + type: TypeError, + message: 'The encoded data was not valid for encoding utf-8' + })); + }); + + ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { + const dec = new TextDecoder(i, { fatal: true }); + assert.doesNotThrow(() => dec.decode(buf.slice(0, 8), { stream: true })); + assert.doesNotThrow(() => dec.decode(buf.slice(8))); + }); +} else { + assert.throws( + () => new TextDecoder('utf-8', { fatal: true }), + common.expectsError({ + code: 'ERR_NO_ICU', + type: TypeError, + message: '"fatal" option is not supported on Node.js compiled without ICU' + })); +} + +// Test TextDecoder, UTF-16le +{ + const dec = new TextDecoder('utf-16le'); + const res = dec.decode(Buffer.from('test€', 'utf-16le')); + assert.strictEqual(res, 'test€'); +} + +// Test TextDecoder, UTF-16be +if (common.hasIntl) { + const dec = new TextDecoder('utf-16be'); + const res = dec.decode(Buffer.from('test€', 'utf-16le').swap16()); + assert.strictEqual(res, 'test€'); +} + +{ + const fn = TextDecoder.prototype[inspect]; + assert.doesNotThrow(() => { + fn.call(new TextDecoder(), Infinity, {}); + }); + + [{}, [], true, 1, '', new TextEncoder()].forEach((i) => { + assert.throws(() => fn.call(i, Infinity, {}), + common.expectsError({ + code: 'ERR_INVALID_THIS', + type: TypeError, + message: 'Value of "this" must be of type TextDecoder' + })); + }); +} diff --git a/test/parallel/test-whatwg-encoding-textencoder.js b/test/parallel/test-whatwg-encoding-textencoder.js new file mode 100644 index 00000000000000..cf2769bb0ce577 --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textencoder.js @@ -0,0 +1,36 @@ +// Flags: --expose-internals +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const { TextDecoder, TextEncoder } = require('util'); +const { customInspectSymbol: inspect } = require('internal/util'); + +const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, + 0x73, 0x74, 0xe2, 0x82, 0xac]); + +// Make Sure TextEncoder exists +assert(TextEncoder); + +// Test TextEncoder +const enc = new TextEncoder(); +assert(enc); +const buf = enc.encode('\ufefftest€'); + +assert.strictEqual(Buffer.compare(buf, encoded), 0); + +{ + const fn = TextEncoder.prototype[inspect]; + assert.doesNotThrow(() => { + fn.call(new TextEncoder(), Infinity, {}); + }); + + [{}, [], true, 1, '', new TextDecoder()].forEach((i) => { + assert.throws(() => fn.call(i, Infinity, {}), + common.expectsError({ + code: 'ERR_INVALID_THIS', + type: TypeError, + message: 'Value of "this" must be of type TextEncoder' + })); + }); +}