Skip to content

Commit

Permalink
Add support for reading from a WebStreams
Browse files Browse the repository at this point in the history
- Stream Blob via a WebStreams, instead of buffering the full content
- Update strtok3 to v7.0.0
  • Loading branch information
Borewit committed Jul 6, 2024
1 parent 37233b1 commit 569d467
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 100 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
fail-fast: false
matrix:
node-version:
- 22
- 20
- 18
steps:
Expand Down
29 changes: 0 additions & 29 deletions browser.d.ts

This file was deleted.

15 changes: 0 additions & 15 deletions browser.js

This file was deleted.

19 changes: 12 additions & 7 deletions core.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import type {Readable as ReadableStream} from 'node:stream';
/**
* Typings for primary entry point, Node.js specific typings can be found in index.d.ts
*/

import type {Readable as NodeReadableStream} from 'node:stream';
import type {ReadableStream as WebReadableStream} from 'node:stream/web';
import type {ITokenizer} from 'strtok3';

export type FileExtension =
Expand Down Expand Up @@ -318,7 +323,7 @@ export type FileTypeResult = {
readonly mime: MimeType;
};

export type ReadableStreamWithFileType = ReadableStream & {
export type ReadableStreamWithFileType = NodeReadableStream & {
readonly fileType?: FileTypeResult;
};

Expand All @@ -339,10 +344,10 @@ Detect the file type of a Node.js [readable stream](https://nodejs.org/api/strea
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
@param stream - A readable stream representing file data.
@param stream - A Node.js Readable stream or Web API Readable Stream representing file data. The Web Readable stream **must be a byte stream**.
@returns The detected file type, or `undefined` when there is no match.
*/
export function fileTypeFromStream(stream: ReadableStream): Promise<FileTypeResult | undefined>;
export function fileTypeFromStream(stream: NodeReadableStream | WebReadableStream): Promise<FileTypeResult | undefined>;

/**
Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source.
Expand Down Expand Up @@ -420,7 +425,7 @@ if (stream2.fileType?.mime === 'image/jpeg') {
}
```
*/
export function fileTypeStream(readableStream: ReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
export function fileTypeStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;

/**
Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob) or [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File).
Expand Down Expand Up @@ -511,7 +516,7 @@ export declare class FileTypeParser {
/**
Works the same way as {@link fileTypeFromStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromStream(stream: ReadableStream): Promise<FileTypeResult | undefined>;
fromStream(stream: NodeReadableStream | WebReadableStream): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeFromTokenizer}, additionally taking into account custom detectors (if any were provided to the constructor).
Expand All @@ -526,5 +531,5 @@ export declare class FileTypeParser {
/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(readableStream: ReadableStream, options?: StreamOptions): Promise<FileTypeResult | undefined>;
toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<FileTypeResult | undefined>;
}
16 changes: 10 additions & 6 deletions core.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
/**
* Primary entry point, Node.js specific entry point is index.js
*/

import {Buffer} from 'node:buffer';
import {ReadableStream as WebReadableStream} from 'node:stream/web';
import * as Token from 'token-types';
import * as strtok3 from 'strtok3/core';
import {
Expand Down Expand Up @@ -88,12 +93,11 @@ export class FileTypeParser {
}

async fromBlob(blob) {
const buffer = await blob.arrayBuffer();
return this.fromBuffer(new Uint8Array(buffer));
return this.fromStream(blob.stream());
}

async fromStream(stream) {
const tokenizer = await strtok3.fromStream(stream);
const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream) : strtok3.fromStream(stream));
try {
return await this.fromTokenizer(tokenizer);
} finally {
Expand Down Expand Up @@ -576,7 +580,7 @@ export class FileTypeParser {
) {
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
const brandMajor = this.buffer.toString('binary', 8, 12).replace('\0', ' ').trim();
const brandMajor = this.buffer.toString('latin1', 8, 12).replace('\0', ' ').trim();
switch (brandMajor) {
case 'avif':
case 'avis':
Expand Down Expand Up @@ -1059,7 +1063,7 @@ export class FileTypeParser {
}

if (this.checkString('AC')) {
const version = this.buffer.toString('binary', 2, 6);
const version = this.buffer.toString('latin1', 2, 6);
if (version.match('^d*') && version >= 1000 && version <= 1050) {
return {
ext: 'dwg',
Expand Down Expand Up @@ -1126,7 +1130,7 @@ export class FileTypeParser {
async function readChunkHeader() {
return {
length: await tokenizer.readToken(Token.INT32_BE),
type: await tokenizer.readToken(new Token.StringType(4, 'binary')),
type: await tokenizer.readToken(new Token.StringType(4, 'latin1')),
};
}

Expand Down
18 changes: 11 additions & 7 deletions index.d.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import type {FileTypeResult} from './core.js';

/**
Detect the file type of a file path.
* Typings for Node.js specific entry point
*/

The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
import type {FileTypeResult} from './core.js';

@param path - The file path to parse.
@returns The detected file type and MIME type or `undefined` when there is no match.
*/
/**
* Detect the file type of a file path.
*
* The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
*
* @param path
* @returns The detected file type and MIME type or `undefined` when there is no match.
*/
export function fileTypeFromFile(path: string): Promise<FileTypeResult | undefined>;

export * from './core.js';
4 changes: 4 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/**
* Node.js specific entry point
*/

import * as strtok3 from 'strtok3';
import {FileTypeParser} from './core.js';

Expand Down
2 changes: 1 addition & 1 deletion index.test-d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {createReadStream} from 'node:fs';
import {expectType} from 'tsd';
import {
type FileTypeResult as FileTypeResultBrowser,
} from './browser.js';
} from './core.js';
import {
fileTypeFromBlob,
fileTypeFromBuffer,
Expand Down
9 changes: 3 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"exports": {
".": {
"node": "./index.js",
"default": "./browser.js"
"default": "./core.js"
},
"./core": "./core.js"
},
Expand All @@ -28,8 +28,6 @@
"files": [
"index.js",
"index.d.ts",
"browser.js",
"browser.d.ts",
"core.js",
"core.d.ts",
"supported.js",
Expand Down Expand Up @@ -210,9 +208,8 @@
"fbx"
],
"dependencies": {
"readable-web-to-node-stream": "^3.0.2",
"strtok3": "^7.0.0",
"token-types": "^5.0.1"
"strtok3": "^7.1.0",
"token-types": "^6.0.0"
},
"devDependencies": {
"@tokenizer/token": "^0.3.0",
Expand Down
4 changes: 3 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ The file path to parse.

### fileTypeFromStream(stream)

Detect the file type of a Node.js [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
Detect the file type of a [Node.js readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) or a [Web API ReadableStream](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream).

The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.

Expand All @@ -168,6 +168,8 @@ A readable stream representing file data.

Detect the file type of a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob).

It will **stream** the underlying Blob, and required a [ReadableStreamBYOBReader](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStreamBYOBReader) which **require Node.js ≥ 20**.

The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.

Returns a `Promise` for an object with the detected file type:
Expand Down
71 changes: 43 additions & 28 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ const missingTests = new Set([
'mpc',
]);

const [nodeMajorVersion] = process.versions.node.split('.').map(Number);
const nodeVersionSupportingByeBlobStream = 20;

const types = [...supportedExtensions].filter(ext => !missingTests.has(ext));

// Define an entry here only if the fixture has a different
Expand Down Expand Up @@ -351,6 +354,10 @@ async function testStream(t, ext, name) {
t.true(bufferA.equals(bufferB));
}

test('Test suite must be able to detect Node.js major version', t => {
t.is(typeof nodeMajorVersion, 'number', 'Detected Node.js major version should be a number');
});

let i = 0;
for (const type of types) {
if (Object.prototype.hasOwnProperty.call(names, type)) {
Expand All @@ -360,7 +367,11 @@ for (const type of types) {

_test(`${name}.${type} ${i++} .fileTypeFromFile() method - same fileType`, testFromFile, type, name);
_test(`${name}.${type} ${i++} .fileTypeFromBuffer() method - same fileType`, testFromBuffer, type, name);
_test(`${name}.${type} ${i++} .fileTypeFromBlob() method - same fileType`, testFromBlob, type, name);
if (nodeMajorVersion >= nodeVersionSupportingByeBlobStream) {
// Blob requires to stream to BYOB ReadableStream, requiring Node.js ≥ 20
_test(`${name}.${type} ${i++} .fileTypeFromBlob() method - same fileType`, testFromBlob, type, name);
}

_test(`${name}.${type} ${i++} .fileTypeFromStream() method - same fileType`, testFileFromStream, type, name);
test(`${name}.${type} ${i++} .fileTypeStream() - identical streams`, testStream, type, name);
}
Expand Down Expand Up @@ -684,41 +695,45 @@ const tokenizerPositionChanger = tokenizer => {
tokenizer.readBuffer(buffer, {length: 1, mayBeLess: true});
};

test('fileTypeFromBlob should detect custom file type "unicorn" using custom detectors', async t => {
// Set up the "unicorn" file content
const header = 'UNICORN FILE\n';
const blob = new Blob([header]);
if (nodeMajorVersion >= nodeVersionSupportingByeBlobStream) {
// Blob requires to stream to BYOB ReadableStream, requiring Node.js ≥ 20

const customDetectors = [unicornDetector];
const parser = new FileTypeParser({customDetectors});
test('fileTypeFromBlob should detect custom file type "unicorn" using custom detectors', async t => {
// Set up the "unicorn" file content
const header = 'UNICORN FILE\n';
const blob = new Blob([header]);

const result = await parser.fromBlob(blob);
t.deepEqual(result, {ext: 'unicorn', mime: 'application/unicorn'});
});
const customDetectors = [unicornDetector];
const parser = new FileTypeParser({customDetectors});

test('fileTypeFromBlob should keep detecting default file types when no custom detector matches', async t => {
const file = path.join(__dirname, 'fixture', 'fixture.png');
const chunk = fs.readFileSync(file);
const blob = new Blob([chunk]);
const result = await parser.fromBlob(blob);
t.deepEqual(result, {ext: 'unicorn', mime: 'application/unicorn'});
});

const customDetectors = [unicornDetector];
const parser = new FileTypeParser({customDetectors});
test('fileTypeFromBlob should keep detecting default file types when no custom detector matches', async t => {
const file = path.join(__dirname, 'fixture', 'fixture.png');
const chunk = fs.readFileSync(file);
const blob = new Blob([chunk]);

const result = await parser.fromBlob(blob);
t.deepEqual(result, {ext: 'png', mime: 'image/png'});
});
const customDetectors = [unicornDetector];
const parser = new FileTypeParser({customDetectors});

test('fileTypeFromBlob should allow overriding default file type detectors', async t => {
const file = path.join(__dirname, 'fixture', 'fixture.png');
const chunk = fs.readFileSync(file);
const blob = new Blob([chunk]);
const result = await parser.fromBlob(blob);
t.deepEqual(result, {ext: 'png', mime: 'image/png'});
});

const customDetectors = [mockPngDetector];
const parser = new FileTypeParser({customDetectors});
test('fileTypeFromBlob should allow overriding default file type detectors', async t => {
const file = path.join(__dirname, 'fixture', 'fixture.png');
const chunk = fs.readFileSync(file);
const blob = new Blob([chunk]);

const result = await parser.fromBlob(blob);
t.deepEqual(result, {ext: 'mockPng', mime: 'image/mockPng'});
});
const customDetectors = [mockPngDetector];
const parser = new FileTypeParser({customDetectors});

const result = await parser.fromBlob(blob);
t.deepEqual(result, {ext: 'mockPng', mime: 'image/mockPng'});
});
}

test('fileTypeFromBuffer should detect custom file type "unicorn" using custom detectors', async t => {
const header = 'UNICORN FILE\n';
Expand Down

0 comments on commit 569d467

Please sign in to comment.