Skip to content

Commit

Permalink
Move Node detection Stream functionality from primary entry point (co…
Browse files Browse the repository at this point in the history
…re) to Node (only) entry point
  • Loading branch information
Borewit committed Jul 7, 2024
1 parent fc746a2 commit 1ab8b6d
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 91 deletions.
48 changes: 1 addition & 47 deletions core.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ Typings for primary entry point, Node.js specific typings can be found in index.
*/

import type {ReadableStream as WebReadableStream} from 'node:stream/web';
import type {Readable as NodeReadableStream} from 'node:stream';
import type {ITokenizer} from 'strtok3';

export type FileExtension =
Expand Down Expand Up @@ -323,10 +322,6 @@ export type FileTypeResult = {
readonly mime: MimeType;
};

export type ReadableStreamWithFileType = WebReadableStream & {
readonly fileType?: FileTypeResult;
};

/**
Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
Expand All @@ -347,7 +342,7 @@ The file type is detected by checking the [magic number](https://en.wikipedia.or
@param stream - A Node.js Readable stream or Web API Readable Stream representing file data. The Web Readable stream **must be a byte stream**.
@returns The detected file type, or `undefined` when there is no match.
*/
export function fileTypeFromStream(stream: WebReadableStream): Promise<FileTypeResult | undefined>;
export function fileTypeFromStream(stream: WebReadableStream<Uint8Array>): Promise<FileTypeResult | undefined>;

/**
Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source.
Expand Down Expand Up @@ -396,37 +391,6 @@ export type StreamOptions = {
readonly sampleSize?: number;
};

/**
Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
This method can be handy to put in between a stream, but it comes with a price.
Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
The sample size impacts the file detection resolution.
A smaller sample size will result in lower probability of the best file type detection.
**Note:** This method is only available when using Node.js.
**Note:** Requires Node.js 14 or later.
@param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
@returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@example
```
import got from 'got';
import {fileTypeStream} from 'file-type';
const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
const stream1 = got.stream(url);
const stream2 = await fileTypeStream(stream1, {sampleSize: 1024});
if (stream2.fileType?.mime === 'image/jpeg') {
// stream2 can be used to stream the JPEG image (from the very beginning of the stream)
}
```
*/
export function fileTypeStream(readableStream: WebReadableStream<Uint8Array>, options?: StreamOptions): Promise<ReadableStreamWithFileType>;

/**
Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob) or [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File).
Expand Down Expand Up @@ -513,11 +477,6 @@ export declare class FileTypeParser {
*/
fromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeFromStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromStream(stream: NodeReadableStream): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeFromTokenizer}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
Expand All @@ -527,9 +486,4 @@ export declare class FileTypeParser {
Works the same way as {@link fileTypeFromBlob}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromBlob(blob: Blob): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<FileTypeResult | undefined>;
}
43 changes: 2 additions & 41 deletions core.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {
} from './util.js';
import {extensions, mimeTypes} from './supported.js';

const minimumBytes = 4100; // A fair amount of file-types are detectable within this range.
export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range.

export async function fileTypeFromStream(stream) {
return new FileTypeParser().fromStream(stream);
Expand Down Expand Up @@ -104,41 +104,6 @@ export class FileTypeParser {
}
}

async toDetectionStream(readableStream, options = {}) {
const {default: stream} = await import('node:stream');
const {sampleSize = minimumBytes} = options;

return new Promise((resolve, reject) => {
readableStream.on('error', reject);

readableStream.once('readable', () => {
(async () => {
try {
// Set up output stream
const pass = new stream.PassThrough();
const outputStream = stream.pipeline ? stream.pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);

// Read the input stream and detect the filetype
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
try {
pass.fileType = await this.fromBuffer(chunk);
} catch (error) {
if (error instanceof strtok3.EndOfStreamError) {
pass.fileType = undefined;
} else {
reject(error);
}
}

resolve(outputStream);
} catch (error) {
reject(error);
}
})();
});
});
}

check(header, options) {
return _check(this.buffer, header, options);
}
Expand All @@ -148,7 +113,7 @@ export class FileTypeParser {
}

async parse(tokenizer) {
this.buffer = new Uint8Array(minimumBytes);
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);

// Keep reading until EOF if the file size is unknown.
if (tokenizer.fileInfo.size === undefined) {
Expand Down Expand Up @@ -1693,9 +1658,5 @@ export class FileTypeParser {
}
}

export async function fileTypeStream(readableStream, options = {}) {
return new FileTypeParser().toDetectionStream(readableStream, options);
}

export const supportedExtensions = new Set(extensions);
export const supportedMimeTypes = new Set(mimeTypes);
42 changes: 41 additions & 1 deletion index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,23 @@ Typings for Node.js specific entry point.

import type {Readable as NodeReadableStream} from 'node:stream';
import type {ReadableStream as WebReadableStream} from 'node:stream/web';
import type {FileTypeResult} from './core.js';
import type {FileTypeResult, StreamOptions} from './core.js';
import {FileTypeParser} from './core.js';

export type ReadableStreamWithFileType = NodeReadableStream & {
readonly fileType?: FileTypeResult;
};

export declare class NodeFileTypeParser extends FileTypeParser {
/**
@param stream - Node.js `stream.Readable` or Web API `ReadableStream`.
*/
fromStream(stream: WebReadableStream<Uint8Array> | NodeReadableStream): Promise<FileTypeResult | undefined>;

/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
}

/**
Expand All @@ -26,4 +35,35 @@ export function fileTypeFromFile(path: string): Promise<FileTypeResult | undefin

export function fileTypeFromStream(stream: WebReadableStream<Uint8Array> | NodeReadableStream): Promise<FileTypeResult | undefined>;

/**
Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
This method can be handy to put in between a stream, but it comes with a price.
Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type.
The sample size impacts the file detection resolution.
A smaller sample size will result in lower probability of the best file type detection.
**Note:** This method is only available when using Node.js.
**Note:** Requires Node.js 14 or later.
@param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
@returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
@example
```
import got from 'got';
import {fileTypeStream} from 'file-type';
const url = 'https://upload.wikimedia.org/wikipedia/en/a/a9/Example.jpg';
const stream1 = got.stream(url);
const stream2 = await fileTypeStream(stream1, {sampleSize: 1024});
if (stream2.fileType?.mime === 'image/jpeg') {
// stream2 can be used to stream the JPEG image (from the very beginning of the stream)
}
```
*/
export function fileTypeStream(readableStream: NodeReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;

export * from './core.js';
43 changes: 41 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Node.js specific entry point.

import {ReadableStream as WebReadableStream} from 'node:stream/web';
import * as strtok3 from 'strtok3';
import {FileTypeParser} from './core.js';
import {FileTypeParser, reasonableDetectionSizeInBytes} from './core.js';

export class NodeFileTypeParser extends FileTypeParser {
async fromStream(stream) {
Expand All @@ -15,6 +15,41 @@ export class NodeFileTypeParser extends FileTypeParser {
await tokenizer.close();
}
}

async toDetectionStream(readableStream, options = {}) {
const {default: stream} = await import('node:stream');
const {sampleSize = reasonableDetectionSizeInBytes} = options;

return new Promise((resolve, reject) => {
readableStream.on('error', reject);

readableStream.once('readable', () => {
(async () => {
try {
// Set up output stream
const pass = new stream.PassThrough();
const outputStream = stream.pipeline ? stream.pipeline(readableStream, pass, () => {}) : readableStream.pipe(pass);

// Read the input stream and detect the filetype
const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? new Uint8Array(0);
try {
pass.fileType = await this.fromBuffer(chunk);
} catch (error) {
if (error instanceof strtok3.EndOfStreamError) {
pass.fileType = undefined;
} else {
reject(error);
}
}

resolve(outputStream);
} catch (error) {
reject(error);
}
})();
});
});
}
}

export async function fileTypeFromFile(path, fileTypeOptions) {
Expand All @@ -31,4 +66,8 @@ export async function fileTypeFromStream(stream, fileTypeOptions) {
return (new NodeFileTypeParser(fileTypeOptions)).fromStream(stream);
}

export {fileTypeFromBuffer, fileTypeFromBlob, fileTypeStream, FileTypeParser, supportedMimeTypes, supportedExtensions} from './core.js';
export async function fileTypeStream(readableStream, options = {}) {
return new NodeFileTypeParser().toDetectionStream(readableStream, options);
}

export {fileTypeFromBuffer, fileTypeFromBlob, FileTypeParser, supportedMimeTypes, supportedExtensions} from './core.js';

0 comments on commit 1ab8b6d

Please sign in to comment.