Skip to content

Commit

Permalink
Implement ignoreBOM option of UTF8Decoder in text_encoding (#3040)
Browse files Browse the repository at this point in the history
  • Loading branch information
7k8m authored and ry committed Oct 2, 2019
1 parent 75eeac0 commit a646c2a
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 5 deletions.
2 changes: 1 addition & 1 deletion js/lib.deno_runtime.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2372,7 +2372,7 @@ declare namespace textEncoding {
}
export interface TextDecoderOptions {
fatal?: boolean;
ignoreBOM?: false;
ignoreBOM?: boolean;
}
export class TextDecoder {
private _encoding;
Expand Down
37 changes: 33 additions & 4 deletions js/text_encoding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ class UTF8Decoder implements Decoder {
private _bytesSeen = 0;
private _bytesNeeded = 0;
private _fatal: boolean;
private _ignoreBOM: boolean;
private _lowerBoundary = 0x80;
private _upperBoundary = 0xbf;

constructor(options: DecoderOptions) {
this._fatal = options.fatal || false;
this._ignoreBOM = options.ignoreBOM || false;
}

handler(stream: Stream, byte: number): number | null {
Expand All @@ -76,6 +78,26 @@ class UTF8Decoder implements Decoder {
return FINISHED;
}

if (this._ignoreBOM) {
if (
(this._bytesSeen === 0 && byte !== 0xef) ||
(this._bytesSeen === 1 && byte !== 0xbb)
) {
this._ignoreBOM = false;
}

if (this._bytesSeen === 2) {
this._ignoreBOM = false;
if (byte === 0xbf) {
//Ignore BOM
this._codePoint = 0;
this._bytesNeeded = 0;
this._bytesSeen = 0;
return CONTINUE;
}
}
}

if (this._bytesNeeded === 0) {
if (isASCIIByte(byte)) {
// Single byte code point
Expand Down Expand Up @@ -225,6 +247,7 @@ export function btoa(s: string): string {

interface DecoderOptions {
fatal?: boolean;
ignoreBOM?: boolean;
}

interface Decoder {
Expand All @@ -240,6 +263,9 @@ class SingleByteDecoder implements Decoder {
private _fatal: boolean;

constructor(index: number[], options: DecoderOptions) {
if (options.ignoreBOM) {
throw new TypeError("Ignoring the BOM is available only with utf-8.");
}
this._fatal = options.fatal || false;
this._index = index;
}
Expand Down Expand Up @@ -367,7 +393,7 @@ export interface TextDecodeOptions {

export interface TextDecoderOptions {
fatal?: boolean;
ignoreBOM?: false;
ignoreBOM?: boolean;
}

type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer;
Expand All @@ -387,11 +413,11 @@ export class TextDecoder {
/** Returns `true` if error mode is "fatal", and `false` otherwise. */
readonly fatal: boolean = false;
/** Returns `true` if ignore BOM flag is set, and `false` otherwise. */
readonly ignoreBOM = false;
readonly ignoreBOM: boolean = false;

constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) {
if (options.ignoreBOM) {
throw new TypeError("Ignoring the BOM not supported.");
this.ignoreBOM = true;
}
if (options.fatal) {
this.fatal = true;
Expand Down Expand Up @@ -435,7 +461,10 @@ export class TextDecoder {
bytes = new Uint8Array(0);
}

const decoder = decoders.get(this._encoding)!({ fatal: this.fatal });
const decoder = decoders.get(this._encoding)!({
fatal: this.fatal,
ignoreBOM: this.ignoreBOM
});
const inputStream = new Stream(bytes);
const output: number[] = [];

Expand Down
26 changes: 26 additions & 0 deletions js/text_encoding_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,32 @@ test(function textDecoder2(): void {
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
});

test(function textDecoderIgnoreBOM(): void {
// prettier-ignore
const fixture = new Uint8Array([
0xef, 0xbb, 0xbf,
0xf0, 0x9d, 0x93, 0xbd,
0xf0, 0x9d, 0x93, 0xae,
0xf0, 0x9d, 0x94, 0x81,
0xf0, 0x9d, 0x93, 0xbd
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
});

test(function textDecoderNotBOM(): void {
// prettier-ignore
const fixture = new Uint8Array([
0xef, 0xbb, 0x89,
0xf0, 0x9d, 0x93, 0xbd,
0xf0, 0x9d, 0x93, 0xae,
0xf0, 0x9d, 0x94, 0x81,
0xf0, 0x9d, 0x93, 0xbd
]);
const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
assertEquals(decoder.decode(fixture), "ﻉ𝓽𝓮𝔁𝓽");
});

test(function textDecoderASCII(): void {
const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
const decoder = new TextDecoder("ascii");
Expand Down

0 comments on commit a646c2a

Please sign in to comment.