From 569d4676a8f6a3349e2c8536ab47a64e9f7a417e Mon Sep 17 00:00:00 2001 From: Borewit Date: Thu, 4 Jul 2024 23:28:02 +0200 Subject: [PATCH] Add support for reading from a WebStreams - Stream Blob via a WebStreams, instead of buffering the full content - Update strtok3 to v7.0.0 --- .github/workflows/main.yml | 1 + browser.d.ts | 29 ---------------- browser.js | 15 -------- core.d.ts | 19 ++++++---- core.js | 16 +++++---- index.d.ts | 18 ++++++---- index.js | 4 +++ index.test-d.ts | 2 +- package.json | 9 ++--- readme.md | 4 ++- test.js | 71 +++++++++++++++++++++++--------------- 11 files changed, 88 insertions(+), 100 deletions(-) delete mode 100644 browser.d.ts delete mode 100644 browser.js diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 346585cf..6e981506 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,6 +10,7 @@ jobs: fail-fast: false matrix: node-version: + - 22 - 20 - 18 steps: diff --git a/browser.d.ts b/browser.d.ts deleted file mode 100644 index 83c015f9..00000000 --- a/browser.d.ts +++ /dev/null @@ -1,29 +0,0 @@ -import type {FileTypeResult} from './core.js'; - -/** -Detect the file type of a [`ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream). - -@example -``` -import {fileTypeFromStream} from 'file-type'; - -const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg'; - -const response = await fetch(url); -const fileType = await fileTypeFromStream(response.body); - -console.log(fileType); -//=> {ext: 'jpg', mime: 'image/jpeg'} -``` -*/ -export declare function fileTypeFromStream(stream: ReadableStream): Promise; - -export { - fileTypeFromBuffer, - fileTypeFromBlob, - supportedExtensions, - supportedMimeTypes, - type FileTypeResult, - type FileExtension, - type MimeType, -} from './core.js'; diff --git a/browser.js b/browser.js deleted file mode 100644 index 54cdb06e..00000000 --- a/browser.js +++ /dev/null @@ -1,15 +0,0 @@ -import {ReadableWebToNodeStream} from 'readable-web-to-node-stream'; -import {fileTypeFromStream as coreFileTypeFromStream} from './core.js'; - -export async function fileTypeFromStream(stream) { - const readableWebToNodeStream = new ReadableWebToNodeStream(stream); - const fileType = await coreFileTypeFromStream(readableWebToNodeStream); - await readableWebToNodeStream.close(); - return fileType; -} - -export { - fileTypeFromTokenizer, - fileTypeFromBuffer, - fileTypeStream, -} from './core.js'; diff --git a/core.d.ts b/core.d.ts index 3e8375df..6a84473e 100644 --- a/core.d.ts +++ b/core.d.ts @@ -1,4 +1,9 @@ -import type {Readable as ReadableStream} from 'node:stream'; +/** + * Typings for primary entry point, Node.js specific typings can be found in index.d.ts + */ + +import type {Readable as NodeReadableStream} from 'node:stream'; +import type {ReadableStream as WebReadableStream} from 'node:stream/web'; import type {ITokenizer} from 'strtok3'; export type FileExtension = @@ -318,7 +323,7 @@ export type FileTypeResult = { readonly mime: MimeType; }; -export type ReadableStreamWithFileType = ReadableStream & { +export type ReadableStreamWithFileType = NodeReadableStream & { readonly fileType?: FileTypeResult; }; @@ -339,10 +344,10 @@ Detect the file type of a Node.js [readable stream](https://nodejs.org/api/strea The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer. -@param stream - A readable stream representing file data. +@param stream - A Node.js Readable stream or Web API Readable Stream representing file data. The Web Readable stream **must be a byte stream**. @returns The detected file type, or `undefined` when there is no match. */ -export function fileTypeFromStream(stream: ReadableStream): Promise; +export function fileTypeFromStream(stream: NodeReadableStream | WebReadableStream): Promise; /** Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source. @@ -420,7 +425,7 @@ if (stream2.fileType?.mime === 'image/jpeg') { } ``` */ -export function fileTypeStream(readableStream: ReadableStream, options?: StreamOptions): Promise; +export function fileTypeStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise; /** Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob) or [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File). @@ -511,7 +516,7 @@ export declare class FileTypeParser { /** Works the same way as {@link fileTypeFromStream}, additionally taking into account custom detectors (if any were provided to the constructor). */ - fromStream(stream: ReadableStream): Promise; + fromStream(stream: NodeReadableStream | WebReadableStream): Promise; /** Works the same way as {@link fileTypeFromTokenizer}, additionally taking into account custom detectors (if any were provided to the constructor). @@ -526,5 +531,5 @@ export declare class FileTypeParser { /** Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor). */ - toDetectionStream(readableStream: ReadableStream, options?: StreamOptions): Promise; + toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise; } diff --git a/core.js b/core.js index 34f4215a..d43a7fba 100644 --- a/core.js +++ b/core.js @@ -1,4 +1,9 @@ +/** + * Primary entry point, Node.js specific entry point is index.js + */ + import {Buffer} from 'node:buffer'; +import {ReadableStream as WebReadableStream} from 'node:stream/web'; import * as Token from 'token-types'; import * as strtok3 from 'strtok3/core'; import { @@ -88,12 +93,11 @@ export class FileTypeParser { } async fromBlob(blob) { - const buffer = await blob.arrayBuffer(); - return this.fromBuffer(new Uint8Array(buffer)); + return this.fromStream(blob.stream()); } async fromStream(stream) { - const tokenizer = await strtok3.fromStream(stream); + const tokenizer = await (stream instanceof WebReadableStream ? strtok3.fromWebStream(stream) : strtok3.fromStream(stream)); try { return await this.fromTokenizer(tokenizer); } finally { @@ -576,7 +580,7 @@ export class FileTypeParser { ) { // They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect. // For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension. - const brandMajor = this.buffer.toString('binary', 8, 12).replace('\0', ' ').trim(); + const brandMajor = this.buffer.toString('latin1', 8, 12).replace('\0', ' ').trim(); switch (brandMajor) { case 'avif': case 'avis': @@ -1059,7 +1063,7 @@ export class FileTypeParser { } if (this.checkString('AC')) { - const version = this.buffer.toString('binary', 2, 6); + const version = this.buffer.toString('latin1', 2, 6); if (version.match('^d*') && version >= 1000 && version <= 1050) { return { ext: 'dwg', @@ -1126,7 +1130,7 @@ export class FileTypeParser { async function readChunkHeader() { return { length: await tokenizer.readToken(Token.INT32_BE), - type: await tokenizer.readToken(new Token.StringType(4, 'binary')), + type: await tokenizer.readToken(new Token.StringType(4, 'latin1')), }; } diff --git a/index.d.ts b/index.d.ts index e2b91ad4..799eaa65 100644 --- a/index.d.ts +++ b/index.d.ts @@ -1,13 +1,17 @@ -import type {FileTypeResult} from './core.js'; - /** -Detect the file type of a file path. + * Typings for Node.js specific entry point + */ -The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer. +import type {FileTypeResult} from './core.js'; -@param path - The file path to parse. -@returns The detected file type and MIME type or `undefined` when there is no match. -*/ +/** + * Detect the file type of a file path. + * + * The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer. + * + * @param path + * @returns The detected file type and MIME type or `undefined` when there is no match. + */ export function fileTypeFromFile(path: string): Promise; export * from './core.js'; diff --git a/index.js b/index.js index 24bacf9d..34f8a6b1 100644 --- a/index.js +++ b/index.js @@ -1,3 +1,7 @@ +/** + * Node.js specific entry point + */ + import * as strtok3 from 'strtok3'; import {FileTypeParser} from './core.js'; diff --git a/index.test-d.ts b/index.test-d.ts index 84225eb4..9563fc28 100644 --- a/index.test-d.ts +++ b/index.test-d.ts @@ -3,7 +3,7 @@ import {createReadStream} from 'node:fs'; import {expectType} from 'tsd'; import { type FileTypeResult as FileTypeResultBrowser, -} from './browser.js'; +} from './core.js'; import { fileTypeFromBlob, fileTypeFromBuffer, diff --git a/package.json b/package.json index 78de61f7..04e4185a 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,7 @@ "exports": { ".": { "node": "./index.js", - "default": "./browser.js" + "default": "./core.js" }, "./core": "./core.js" }, @@ -28,8 +28,6 @@ "files": [ "index.js", "index.d.ts", - "browser.js", - "browser.d.ts", "core.js", "core.d.ts", "supported.js", @@ -210,9 +208,8 @@ "fbx" ], "dependencies": { - "readable-web-to-node-stream": "^3.0.2", - "strtok3": "^7.0.0", - "token-types": "^5.0.1" + "strtok3": "^7.1.0", + "token-types": "^6.0.0" }, "devDependencies": { "@tokenizer/token": "^0.3.0", diff --git a/readme.md b/readme.md index c05f40bd..577c5848 100644 --- a/readme.md +++ b/readme.md @@ -147,7 +147,7 @@ The file path to parse. ### fileTypeFromStream(stream) -Detect the file type of a Node.js [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable). +Detect the file type of a [Node.js readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) or a [Web API ReadableStream](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream). The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer. @@ -168,6 +168,8 @@ A readable stream representing file data. Detect the file type of a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob). +It will **stream** the underlying Blob, and required a [ReadableStreamBYOBReader](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStreamBYOBReader) which **require Node.js ≥ 20**. + The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer. Returns a `Promise` for an object with the detected file type: diff --git a/test.js b/test.js index db0e3a94..4a078db2 100644 --- a/test.js +++ b/test.js @@ -25,6 +25,9 @@ const missingTests = new Set([ 'mpc', ]); +const [nodeMajorVersion] = process.versions.node.split('.').map(Number); +const nodeVersionSupportingByeBlobStream = 20; + const types = [...supportedExtensions].filter(ext => !missingTests.has(ext)); // Define an entry here only if the fixture has a different @@ -351,6 +354,10 @@ async function testStream(t, ext, name) { t.true(bufferA.equals(bufferB)); } +test('Test suite must be able to detect Node.js major version', t => { + t.is(typeof nodeMajorVersion, 'number', 'Detected Node.js major version should be a number'); +}); + let i = 0; for (const type of types) { if (Object.prototype.hasOwnProperty.call(names, type)) { @@ -360,7 +367,11 @@ for (const type of types) { _test(`${name}.${type} ${i++} .fileTypeFromFile() method - same fileType`, testFromFile, type, name); _test(`${name}.${type} ${i++} .fileTypeFromBuffer() method - same fileType`, testFromBuffer, type, name); - _test(`${name}.${type} ${i++} .fileTypeFromBlob() method - same fileType`, testFromBlob, type, name); + if (nodeMajorVersion >= nodeVersionSupportingByeBlobStream) { + // Blob requires to stream to BYOB ReadableStream, requiring Node.js ≥ 20 + _test(`${name}.${type} ${i++} .fileTypeFromBlob() method - same fileType`, testFromBlob, type, name); + } + _test(`${name}.${type} ${i++} .fileTypeFromStream() method - same fileType`, testFileFromStream, type, name); test(`${name}.${type} ${i++} .fileTypeStream() - identical streams`, testStream, type, name); } @@ -684,41 +695,45 @@ const tokenizerPositionChanger = tokenizer => { tokenizer.readBuffer(buffer, {length: 1, mayBeLess: true}); }; -test('fileTypeFromBlob should detect custom file type "unicorn" using custom detectors', async t => { - // Set up the "unicorn" file content - const header = 'UNICORN FILE\n'; - const blob = new Blob([header]); +if (nodeMajorVersion >= nodeVersionSupportingByeBlobStream) { + // Blob requires to stream to BYOB ReadableStream, requiring Node.js ≥ 20 - const customDetectors = [unicornDetector]; - const parser = new FileTypeParser({customDetectors}); + test('fileTypeFromBlob should detect custom file type "unicorn" using custom detectors', async t => { + // Set up the "unicorn" file content + const header = 'UNICORN FILE\n'; + const blob = new Blob([header]); - const result = await parser.fromBlob(blob); - t.deepEqual(result, {ext: 'unicorn', mime: 'application/unicorn'}); -}); + const customDetectors = [unicornDetector]; + const parser = new FileTypeParser({customDetectors}); -test('fileTypeFromBlob should keep detecting default file types when no custom detector matches', async t => { - const file = path.join(__dirname, 'fixture', 'fixture.png'); - const chunk = fs.readFileSync(file); - const blob = new Blob([chunk]); + const result = await parser.fromBlob(blob); + t.deepEqual(result, {ext: 'unicorn', mime: 'application/unicorn'}); + }); - const customDetectors = [unicornDetector]; - const parser = new FileTypeParser({customDetectors}); + test('fileTypeFromBlob should keep detecting default file types when no custom detector matches', async t => { + const file = path.join(__dirname, 'fixture', 'fixture.png'); + const chunk = fs.readFileSync(file); + const blob = new Blob([chunk]); - const result = await parser.fromBlob(blob); - t.deepEqual(result, {ext: 'png', mime: 'image/png'}); -}); + const customDetectors = [unicornDetector]; + const parser = new FileTypeParser({customDetectors}); -test('fileTypeFromBlob should allow overriding default file type detectors', async t => { - const file = path.join(__dirname, 'fixture', 'fixture.png'); - const chunk = fs.readFileSync(file); - const blob = new Blob([chunk]); + const result = await parser.fromBlob(blob); + t.deepEqual(result, {ext: 'png', mime: 'image/png'}); + }); - const customDetectors = [mockPngDetector]; - const parser = new FileTypeParser({customDetectors}); + test('fileTypeFromBlob should allow overriding default file type detectors', async t => { + const file = path.join(__dirname, 'fixture', 'fixture.png'); + const chunk = fs.readFileSync(file); + const blob = new Blob([chunk]); - const result = await parser.fromBlob(blob); - t.deepEqual(result, {ext: 'mockPng', mime: 'image/mockPng'}); -}); + const customDetectors = [mockPngDetector]; + const parser = new FileTypeParser({customDetectors}); + + const result = await parser.fromBlob(blob); + t.deepEqual(result, {ext: 'mockPng', mime: 'image/mockPng'}); + }); +} test('fileTypeFromBuffer should detect custom file type "unicorn" using custom detectors', async t => { const header = 'UNICORN FILE\n';