Skip to content

Commit

Permalink
Check TypeScript errors during build (#1081)
Browse files Browse the repository at this point in the history
* migrate jsconfig.json to tsconfig.json

* add @ts-expect-error

* add type assertion

* fix types for navigator.gpu

* fix types for Processor

* add @ts-expect-error

* ignore error in batch_decode

* Add types for MultiModalityCausalLM
  • Loading branch information
ocavue authored Dec 28, 2024
1 parent 8e075f4 commit c845bb5
Show file tree
Hide file tree
Showing 16 changed files with 84 additions and 5 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"scripts": {
"format": "prettier --write .",
"format:check": "prettier --check .",
"typegen": "tsc ./src/transformers.js --allowJs --declaration --emitDeclarationOnly --declarationMap --outDir types",
"typegen": "tsc --build",
"dev": "webpack serve --no-client-overlay",
"build": "webpack && npm run typegen",
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose",
Expand Down
11 changes: 11 additions & 0 deletions src/base/image_processors_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -604,14 +604,20 @@ export class ImageProcessor extends Callable {
this.do_thumbnail = config.do_thumbnail;
this.size = config.size ?? config.image_size;
this.do_resize = config.do_resize ?? (this.size !== undefined);
// @ts-expect-error TS2339
this.size_divisibility = config.size_divisibility ?? config.size_divisor;

this.do_center_crop = config.do_center_crop;
// @ts-expect-error TS2339
this.crop_size = config.crop_size;
// @ts-expect-error TS2339
this.do_convert_rgb = config.do_convert_rgb ?? true;
// @ts-expect-error TS2339
this.do_crop_margin = config.do_crop_margin;

// @ts-expect-error TS2339
this.pad_size = config.pad_size;
// @ts-expect-error TS2339
this.do_pad = config.do_pad;

if (this.do_pad && !this.pad_size && this.size && this.size.width !== undefined && this.size.height !== undefined) {
Expand Down Expand Up @@ -820,6 +826,7 @@ export class ImageProcessor extends Callable {
// Support both formats for backwards compatibility
else if (Number.isInteger(size)) {
shortest_edge = size;
// @ts-expect-error TS2339
longest_edge = this.config.max_size ?? shortest_edge;

} else if (size !== undefined) {
Expand Down Expand Up @@ -888,6 +895,7 @@ export class ImageProcessor extends Callable {
} else if (size.min_pixels !== undefined && size.max_pixels !== undefined) {
// Custom resize logic for Qwen2-VL models
const { min_pixels, max_pixels } = size;
// @ts-expect-error TS2339
const factor = this.config.patch_size * this.config.merge_size;
return smart_resize(srcHeight, srcWidth, factor, min_pixels, max_pixels);
} else {
Expand All @@ -903,6 +911,7 @@ export class ImageProcessor extends Callable {
async resize(image) {
const [newWidth, newHeight] = this.get_resize_output_image_size(image, this.size);
return await image.resize(newWidth, newHeight, {
// @ts-expect-error TS2322
resample: this.resample,
});
}
Expand Down Expand Up @@ -953,6 +962,7 @@ export class ImageProcessor extends Callable {

// Resize the image using thumbnail method.
if (this.do_thumbnail) {
// @ts-expect-error TS2345
image = await this.thumbnail(image, this.size, this.resample);
}

Expand All @@ -977,6 +987,7 @@ export class ImageProcessor extends Callable {
// NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
// occurs with data in the hwc format (height, width, channels),
// to emulate the behavior of the original Python code (w/ numpy).
/** @type {Float32Array} */
let pixelData = Float32Array.from(image.data);
let imgDims = [image.height, image.width, image.channels];

Expand Down
12 changes: 11 additions & 1 deletion src/base/processing_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { getModelJSON } from '../utils/hub.js';
/**
* @typedef {Object} ProcessorProperties Additional processor-specific properties.
* @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
* @typedef {import('../tokenizers.js').PreTrainedTokenizer} PreTrainedTokenizer
*/


Expand Down Expand Up @@ -61,7 +62,7 @@ export class Processor extends Callable {
}

/**
* @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
* @returns {PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
*/
get tokenizer() {
return this.components.tokenizer;
Expand All @@ -74,6 +75,11 @@ export class Processor extends Callable {
return this.components.feature_extractor;
}

/**
* @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[0]} messages
* @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[1]} options
* @returns {ReturnType<PreTrainedTokenizer['apply_chat_template']>}
*/
apply_chat_template(messages, options = {}) {
if (!this.tokenizer) {
throw new Error('Unable to apply chat template without a tokenizer.');
Expand All @@ -84,6 +90,10 @@ export class Processor extends Callable {
});
}

/**
* @param {Parameters<PreTrainedTokenizer['batch_decode']>} args
* @returns {ReturnType<PreTrainedTokenizer['batch_decode']>}
*/
batch_decode(...args) {
if (!this.tokenizer) {
throw new Error('Unable to decode without a tokenizer.');
Expand Down
5 changes: 5 additions & 0 deletions src/configs.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,19 @@ function getNormalizedConfig(config) {
case 'florence2':
case 'llava_onevision':
case 'idefics3':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.text_config);
break;
case 'moondream1':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.phi_config);
break;
case 'musicgen':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.decoder);
break;
case 'multi_modality':
// @ts-expect-error TS2339
init_normalized_config = getNormalizedConfig(config.language_config);
break;

Expand Down Expand Up @@ -199,6 +203,7 @@ function getNormalizedConfig(config) {
break;

case 'vision-encoder-decoder':
// @ts-expect-error TS2339
const decoderConfig = getNormalizedConfig(config.decoder);

const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
Expand Down
19 changes: 19 additions & 0 deletions src/models.js
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,11 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
} else if (session_options.externalData !== undefined) {
externalDataPromises = session_options.externalData.map(async (ext) => {
// if the external data is a string, fetch the file and replace the string with its content
// @ts-expect-error TS2339
if (typeof ext.data === "string") {
// @ts-expect-error TS2339
const ext_buffer = await getModelFile(pretrained_model_name_or_path, ext.data, true, options);
// @ts-expect-error TS2698
return { ...ext, data: ext_buffer };
}
return ext;
Expand Down Expand Up @@ -1519,6 +1522,7 @@ export class PreTrainedModel extends Callable {
if (this.config.model_type === 'musicgen') {
// Custom logic (TODO: move to Musicgen class)
decoder_input_ids = Array.from({
// @ts-expect-error TS2339
length: batch_size * this.config.decoder.num_codebooks
}, () => [decoder_start_token_id]);

Expand Down Expand Up @@ -1848,11 +1852,13 @@ export class PreTrainedModel extends Callable {
async encode_image({ pixel_values }) {
// image_inputs === { pixel_values }
const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values })).image_features;
// @ts-expect-error TS2339
if (!this.config.num_image_tokens) {
console.warn(
'The number of image tokens was not set in the model configuration. ' +
`Setting it to the number of features detected by the vision encoder (${features.dims[1]}).`
)
// @ts-expect-error TS2339
this.config.num_image_tokens = features.dims[1];
}
return features;
Expand Down Expand Up @@ -3280,6 +3286,7 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {

if (generation_config.return_token_timestamps) {
outputs["token_timestamps"] = this._extract_token_timestamps(
// @ts-expect-error TS2345
outputs,
generation_config.alignment_heads,
generation_config.num_frames,
Expand Down Expand Up @@ -3315,6 +3322,7 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
);
}

// @ts-expect-error TS2339
let median_filter_width = this.config.median_filter_width;
if (median_filter_width === undefined) {
console.warn("Model config has no `median_filter_width`, using default value of 7.")
Expand All @@ -3325,6 +3333,7 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
const batch = generate_outputs.cross_attentions;
// Create a list with `decoder_layers` elements, each a tensor of shape
// (batch size, attention_heads, output length, input length).
// @ts-expect-error TS2339
const cross_attentions = Array.from({ length: this.config.decoder_layers },
// Concatenate the cross attentions for each layer across sequence length dimension.
(_, i) => cat(batch.map(x => x[i]), 2)
Expand Down Expand Up @@ -3468,6 +3477,7 @@ export class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
attention_mask,
}) {

// @ts-expect-error TS2339
const image_token_index = this.config.image_token_index;

const idsList = input_ids.tolist();
Expand Down Expand Up @@ -6201,10 +6211,12 @@ export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {

const { encoder_outputs, encoder_attention_mask } = await encoderForward(this, model_inputs);

// @ts-expect-error TS2339
const r = encoder_outputs.dims[1] / this.config.reduction_factor;
const maxlen = Math.floor(r * maxlenratio);
const minlen = Math.floor(r * minlenratio);

// @ts-expect-error TS2339
const num_mel_bins = this.config.num_mel_bins;

let spectrogramParts = [];
Expand Down Expand Up @@ -6569,11 +6581,13 @@ export class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE:
*/
_apply_and_filter_by_delay_pattern_mask(outputs) {
const [bs_x_codebooks, seqLength] = outputs.dims;
// @ts-expect-error TS2339
const num_codebooks = this.config.decoder.num_codebooks;
const upperBound = (seqLength - num_codebooks);

let newDataSize = 0;
for (let i = 0; i < outputs.size; ++i) {
// @ts-expect-error TS2339
if (outputs.data[i] === this.config.decoder.pad_token_id) {
continue;
}
Expand Down Expand Up @@ -6603,7 +6617,9 @@ export class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE:
let clonedInputIds = structuredClone(input_ids);
for (let i = 0; i < clonedInputIds.length; ++i) {
for (let j = 0; j < clonedInputIds[i].length; ++j) {
// @ts-expect-error TS2339
if ((i % this.config.decoder.num_codebooks) >= j) {
// @ts-expect-error TS2339
clonedInputIds[i][j] = BigInt(this.config.decoder.pad_token_id);
}
}
Expand Down Expand Up @@ -6760,6 +6776,9 @@ export class MultiModalityCausalLM extends MultiModalityPreTrainedModel {
'past_key_values',
];

/**
* @param {ConstructorParameters<typeof MultiModalityPreTrainedModel>} args
*/
constructor(...args) {
super(...args);

Expand Down
1 change: 1 addition & 0 deletions src/models/convnext/image_processing_convnext.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export class ConvNextImageProcessor extends ImageProcessor {
/**
* Percentage of the image to crop. Only has an effect if this.size < 384.
*/
// @ts-expect-error TS2339
this.crop_pct = this.config.crop_pct ?? (224 / 256);
}

Expand Down
1 change: 1 addition & 0 deletions src/models/efficientnet/image_processing_efficientnet.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
export class EfficientNetImageProcessor extends ImageProcessor {
constructor(config) {
super(config);
// @ts-expect-error TS2339
this.include_top = this.config.include_top ?? true;
if (this.include_top) {
this.image_std = this.image_std.map(x => x * x);
Expand Down
3 changes: 3 additions & 0 deletions src/models/florence2/processing_florence2.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ export class Florence2Processor extends Processor {
super(config, components);

const {
// @ts-expect-error TS2339
tasks_answer_post_processing_type,
// @ts-expect-error TS2339
task_prompts_without_inputs,
// @ts-expect-error TS2339
task_prompts_with_input,
} = this.image_processor.config;

Expand Down
1 change: 1 addition & 0 deletions src/models/janus/image_processing_janus.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class VLMImageProcessor extends ImageProcessor {
},
...config,
});
// @ts-expect-error TS2339
this.constant_values = this.config.background_color.map(x => x * this.rescale_factor)
}

Expand Down
2 changes: 2 additions & 0 deletions src/models/mgp_str/processing_mgp_str.js
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ export class MgpstrProcessor extends Processor {
* - bpe_preds: The list of BPE decoded sentences.
* - wp_preds: The list of wp decoded sentences.
*/
// @ts-expect-error The type of this method is not compatible with the one
// in the base class. It might be a good idea to fix this.
batch_decode([char_logits, bpe_logits, wp_logits]) {
const [char_preds, char_scores] = this._decode_helper(char_logits, 'char');
const [bpe_preds, bpe_scores] = this._decode_helper(bpe_logits, 'bpe');
Expand Down
1 change: 1 addition & 0 deletions src/models/paligemma/processing_paligemma.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export class PaliGemmaProcessor extends Processor {
}

const bos_token = this.tokenizer.bos_token;
// @ts-expect-error TS2339
const image_seq_length = this.image_processor.config.image_seq_length;
let input_strings;
if (text.some((t) => t.includes(IMAGE_TOKEN))) {
Expand Down
1 change: 1 addition & 0 deletions src/models/qwen2_vl/processing_qwen2_vl.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export class Qwen2VLProcessor extends Processor {
}

if (image_grid_thw) {
// @ts-expect-error TS2551
let merge_length = this.image_processor.config.merge_size ** 2;
let index = 0;

Expand Down
Loading

0 comments on commit c845bb5

Please sign in to comment.