diff --git a/js/lib.deno_runtime.d.ts b/js/lib.deno_runtime.d.ts index 8eb46b410ee47f..1e220a29ddc7bc 100644 --- a/js/lib.deno_runtime.d.ts +++ b/js/lib.deno_runtime.d.ts @@ -2372,7 +2372,7 @@ declare namespace textEncoding { } export interface TextDecoderOptions { fatal?: boolean; - ignoreBOM?: false; + ignoreBOM?: boolean; } export class TextDecoder { private _encoding; diff --git a/js/text_encoding.ts b/js/text_encoding.ts index a956cd52c3e50a..8386ff8b0ae060 100644 --- a/js/text_encoding.ts +++ b/js/text_encoding.ts @@ -59,11 +59,13 @@ class UTF8Decoder implements Decoder { private _bytesSeen = 0; private _bytesNeeded = 0; private _fatal: boolean; + private _ignoreBOM: boolean; private _lowerBoundary = 0x80; private _upperBoundary = 0xbf; constructor(options: DecoderOptions) { this._fatal = options.fatal || false; + this._ignoreBOM = options.ignoreBOM || false; } handler(stream: Stream, byte: number): number | null { @@ -76,6 +78,26 @@ class UTF8Decoder implements Decoder { return FINISHED; } + if (this._ignoreBOM) { + if ( + (this._bytesSeen === 0 && byte !== 0xef) || + (this._bytesSeen === 1 && byte !== 0xbb) + ) { + this._ignoreBOM = false; + } + + if (this._bytesSeen === 2) { + this._ignoreBOM = false; + if (byte === 0xbf) { + //Ignore BOM + this._codePoint = 0; + this._bytesNeeded = 0; + this._bytesSeen = 0; + return CONTINUE; + } + } + } + if (this._bytesNeeded === 0) { if (isASCIIByte(byte)) { // Single byte code point @@ -225,6 +247,7 @@ export function btoa(s: string): string { interface DecoderOptions { fatal?: boolean; + ignoreBOM?: boolean; } interface Decoder { @@ -240,6 +263,9 @@ class SingleByteDecoder implements Decoder { private _fatal: boolean; constructor(index: number[], options: DecoderOptions) { + if (options.ignoreBOM) { + throw new TypeError("Ignoring the BOM is available only with utf-8."); + } this._fatal = options.fatal || false; this._index = index; } @@ -367,7 +393,7 @@ export interface TextDecodeOptions { export interface TextDecoderOptions { fatal?: boolean; - ignoreBOM?: false; + ignoreBOM?: boolean; } type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer; @@ -387,11 +413,11 @@ export class TextDecoder { /** Returns `true` if error mode is "fatal", and `false` otherwise. */ readonly fatal: boolean = false; /** Returns `true` if ignore BOM flag is set, and `false` otherwise. */ - readonly ignoreBOM = false; + readonly ignoreBOM: boolean = false; constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) { if (options.ignoreBOM) { - throw new TypeError("Ignoring the BOM not supported."); + this.ignoreBOM = true; } if (options.fatal) { this.fatal = true; @@ -435,7 +461,10 @@ export class TextDecoder { bytes = new Uint8Array(0); } - const decoder = decoders.get(this._encoding)!({ fatal: this.fatal }); + const decoder = decoders.get(this._encoding)!({ + fatal: this.fatal, + ignoreBOM: this.ignoreBOM + }); const inputStream = new Stream(bytes); const output: number[] = []; diff --git a/js/text_encoding_test.ts b/js/text_encoding_test.ts index 7274247491bad3..aaa9e6b9dca0fc 100644 --- a/js/text_encoding_test.ts +++ b/js/text_encoding_test.ts @@ -74,6 +74,32 @@ test(function textDecoder2(): void { assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽"); }); +test(function textDecoderIgnoreBOM(): void { + // prettier-ignore + const fixture = new Uint8Array([ + 0xef, 0xbb, 0xbf, + 0xf0, 0x9d, 0x93, 0xbd, + 0xf0, 0x9d, 0x93, 0xae, + 0xf0, 0x9d, 0x94, 0x81, + 0xf0, 0x9d, 0x93, 0xbd + ]); + const decoder = new TextDecoder("utf-8", { ignoreBOM: true }); + assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽"); +}); + +test(function textDecoderNotBOM(): void { + // prettier-ignore + const fixture = new Uint8Array([ + 0xef, 0xbb, 0x89, + 0xf0, 0x9d, 0x93, 0xbd, + 0xf0, 0x9d, 0x93, 0xae, + 0xf0, 0x9d, 0x94, 0x81, + 0xf0, 0x9d, 0x93, 0xbd + ]); + const decoder = new TextDecoder("utf-8", { ignoreBOM: true }); + assertEquals(decoder.decode(fixture), "ﻉ𝓽𝓮𝔁𝓽"); +}); + test(function textDecoderASCII(): void { const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]); const decoder = new TextDecoder("ascii");