diff --git a/src/transformers/models/dac/modeling_dac.py b/src/transformers/models/dac/modeling_dac.py index c52630c13a18..2aa873070b4a 100644 --- a/src/transformers/models/dac/modeling_dac.py +++ b/src/transformers/models/dac/modeling_dac.py @@ -16,7 +16,7 @@ import math from dataclasses import dataclass -from typing import Optional +from typing import Optional, Union import numpy as np import torch @@ -583,7 +583,7 @@ def encode( input_values: torch.Tensor, n_quantizers: Optional[int] = None, return_dict: Optional[bool] = None, - ): + ) -> Union[tuple, DacEncoderOutput]: r""" input_values (`torch.Tensor of shape `(batch_size, 1, time_steps)`): Input audio data to encode, @@ -610,7 +610,7 @@ def decode( quantized_representation: Optional[torch.Tensor] = None, audio_codes: Optional[torch.Tensor] = None, return_dict: Optional[bool] = None, - ): + ) -> Union[tuple, DacDecoderOutput]: r""" quantized_representation (torch.Tensor of shape `(batch_size, dimension, time_steps)`, *optional*): Quantized continuous representation of input. @@ -643,7 +643,7 @@ def forward( input_values: torch.Tensor, n_quantizers: Optional[int] = None, return_dict: Optional[bool] = None, - ): + ) -> Union[tuple, DacOutput]: r""" input_values (`torch.Tensor` of shape `(batch_size, 1, time_steps)`): Audio data to encode. diff --git a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py index f568477303c0..56d2f8ebbf93 100644 --- a/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py +++ b/src/transformers/models/deepseek_vl/modeling_deepseek_vl.py @@ -196,7 +196,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs, - ): + ) -> DeepseekVLBaseModelOutputWithPast: if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" @@ -268,7 +268,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs: Unpack[TransformersKwargs], - ): + ) -> DeepseekVLCausalLMOutputWithPast: r""" labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., diff --git a/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py index c0204fe94382..e53f45c9bee5 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py @@ -314,7 +314,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs, - ): + ) -> DeepseekVLHybridBaseModelOutputWithPast: if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" @@ -424,7 +424,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs: Unpack[TransformersKwargs], - ): + ) -> DeepseekVLHybridCausalLMOutputWithPast: r""" labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., diff --git a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py index 622458328977..0292b9ef4c5f 100644 --- a/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +++ b/src/transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py @@ -297,7 +297,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs, - ): + ) -> DeepseekVLHybridBaseModelOutputWithPast: if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" @@ -361,7 +361,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs: Unpack[TransformersKwargs], - ): + ) -> DeepseekVLHybridCausalLMOutputWithPast: r""" labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., diff --git a/src/transformers/models/flava/modeling_flava.py b/src/transformers/models/flava/modeling_flava.py index 9521329461ed..fd0c99dc35b5 100644 --- a/src/transformers/models/flava/modeling_flava.py +++ b/src/transformers/models/flava/modeling_flava.py @@ -1107,7 +1107,7 @@ def forward( output_hidden_states: bool = True, return_dict: Optional[bool] = None, **kwargs, - ) -> Union[tuple, FlavaOutput]: + ) -> Union[tuple, FlavaModelOutput]: r""" input_ids (`torch.LongTensor` of shape `(batch_size, image_num_patches + text_seq_len)`): Indices of input sequence tokens in the vocabulary. Indices can be obtained using [`AutoTokenizer`]. See diff --git a/src/transformers/models/grounding_dino/modeling_grounding_dino.py b/src/transformers/models/grounding_dino/modeling_grounding_dino.py index 0d1619e7d5cc..5ea505679609 100644 --- a/src/transformers/models/grounding_dino/modeling_grounding_dino.py +++ b/src/transformers/models/grounding_dino/modeling_grounding_dino.py @@ -1511,7 +1511,7 @@ def forward( output_hidden_states=None, return_dict=None, **kwargs, - ): + ) -> Union[tuple, GroundingDinoEncoderOutput]: r""" Args: vision_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): @@ -1666,7 +1666,7 @@ def forward( output_hidden_states=None, return_dict=None, **kwargs, - ): + ) -> Union[tuple, GroundingDinoDecoderOutput]: r""" Args: inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_queries, hidden_size)`): @@ -2059,7 +2059,7 @@ def forward( output_hidden_states=None, return_dict=None, **kwargs, - ): + ) -> Union[tuple, GroundingDinoModelOutput]: r""" input_ids (`torch.LongTensor` of shape `(batch_size, text_sequence_length)`): Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide diff --git a/src/transformers/models/janus/modeling_janus.py b/src/transformers/models/janus/modeling_janus.py index 26a9031054af..aa40cff5dfc7 100644 --- a/src/transformers/models/janus/modeling_janus.py +++ b/src/transformers/models/janus/modeling_janus.py @@ -1126,7 +1126,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs, - ): + ) -> JanusBaseModelOutputWithPast: if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" @@ -1203,7 +1203,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs: Unpack[TransformersKwargs], - ): + ) -> JanusCausalLMOutputWithPast: r""" labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., diff --git a/src/transformers/models/janus/modular_janus.py b/src/transformers/models/janus/modular_janus.py index 4368c3ee0834..aaf5e0d7a11a 100644 --- a/src/transformers/models/janus/modular_janus.py +++ b/src/transformers/models/janus/modular_janus.py @@ -942,7 +942,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs, - ): + ) -> JanusBaseModelOutputWithPast: if (input_ids is None) ^ (inputs_embeds is not None): raise ValueError( "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" @@ -1019,7 +1019,7 @@ def forward( use_cache: Optional[bool] = None, logits_to_keep: Union[int, torch.Tensor] = 0, **kwargs: Unpack[TransformersKwargs], - ): + ) -> JanusCausalLMOutputWithPast: r""" labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., diff --git a/src/transformers/models/maskformer/modeling_maskformer_swin.py b/src/transformers/models/maskformer/modeling_maskformer_swin.py index a9ab377a00d8..b20fa92ece69 100644 --- a/src/transformers/models/maskformer/modeling_maskformer_swin.py +++ b/src/transformers/models/maskformer/modeling_maskformer_swin.py @@ -19,7 +19,7 @@ import collections.abc import math from dataclasses import dataclass -from typing import Optional +from typing import Optional, Union import torch from torch import Tensor, nn @@ -656,7 +656,7 @@ def forward( output_attentions=False, output_hidden_states=False, return_dict=True, - ): + ) -> Union[tuple, MaskFormerSwinBaseModelOutput]: all_hidden_states = () if output_hidden_states else None all_input_dimensions = () all_self_attentions = () if output_attentions else None @@ -739,7 +739,7 @@ def forward( interpolate_pos_encoding=False, return_dict=None, **kwargs, - ): + ) -> Union[tuple, MaskFormerSwinModelOutputWithPooling]: output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states diff --git a/src/transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py b/src/transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py index 91f3b63949dc..a8fe024b074f 100644 --- a/src/transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +++ b/src/transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py @@ -1181,7 +1181,7 @@ def forward( output_hidden_states=None, return_dict=None, **kwargs, - ): + ) -> Union[tuple, MMGroundingDinoEncoderOutput]: r""" Args: vision_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`): @@ -1478,7 +1478,7 @@ def forward( output_hidden_states=None, return_dict=None, **kwargs, - ): + ) -> Union[tuple, MMGroundingDinoDecoderOutput]: r""" Args: inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_queries, hidden_size)`): @@ -1954,7 +1954,7 @@ def forward( output_hidden_states=None, return_dict=None, **kwargs, - ): + ) -> Union[tuple, MMGroundingDinoModelOutput]: r""" input_ids (`torch.LongTensor` of shape `(batch_size, text_sequence_length)`): Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide diff --git a/src/transformers/models/tvp/modeling_tvp.py b/src/transformers/models/tvp/modeling_tvp.py index b74e71123c9c..d7c9d2f073ee 100644 --- a/src/transformers/models/tvp/modeling_tvp.py +++ b/src/transformers/models/tvp/modeling_tvp.py @@ -16,7 +16,7 @@ import math from dataclasses import dataclass -from typing import Optional +from typing import Optional, Union import torch from torch import nn @@ -462,7 +462,7 @@ def forward( output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, - ): + ) -> Union[tuple, BaseModelOutput]: return_dict = return_dict if return_dict is not None else self.config.return_dict output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -722,7 +722,7 @@ def forward( return_dict: Optional[bool] = None, interpolate_pos_encoding: bool = False, **kwargs, - ): + ) -> Union[tuple, BaseModelOutputWithPooling]: r""" Examples: ```python @@ -824,7 +824,7 @@ def forward( return_dict: Optional[bool] = None, interpolate_pos_encoding: bool = False, **kwargs, - ): + ) -> Union[tuple, TvpVideoGroundingOutput]: r""" labels (`torch.FloatTensor` of shape `(batch_size, 3)`, *optional*): The labels contains duration, start time, and end time of the video corresponding to the text. diff --git a/src/transformers/models/udop/modeling_udop.py b/src/transformers/models/udop/modeling_udop.py index 476b88c87608..676dc7e6ffca 100644 --- a/src/transformers/models/udop/modeling_udop.py +++ b/src/transformers/models/udop/modeling_udop.py @@ -1106,7 +1106,7 @@ def forward( return_dict=None, cache_position=None, **kwargs, - ): + ) -> Union[tuple, BaseModelOutputWithAttentionMask]: use_cache = use_cache if use_cache is not None else self.config.use_cache output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1476,7 +1476,7 @@ def forward( return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs, - ) -> tuple[Tensor, ...]: + ) -> Union[tuple, Seq2SeqModelOutput]: r""" bbox (`torch.LongTensor` of shape `({0}, 4)`, *optional*): Bounding boxes of each input sequence tokens. Selected in the range `[0, @@ -1655,7 +1655,7 @@ def forward( labels: Optional[Tensor] = None, cache_position: Optional[torch.LongTensor] = None, **kwargs, - ) -> tuple[Tensor, ...]: + ) -> Union[tuple, Seq2SeqLMOutput]: r""" bbox (`torch.LongTensor` of shape `({0}, 4)`, *optional*): Bounding boxes of each input sequence tokens. Selected in the range `[0,