diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 490b3fac47e5..80d7a9abd192 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -16,8 +16,9 @@ """ TF 2.0 OpenAI GPT model.""" from dataclasses import dataclass -from typing import Optional, Tuple +from typing import Optional, Tuple, Union +import numpy as np import tensorflow as tf from ...activations_tf import get_tf_activation @@ -25,6 +26,7 @@ from ...modeling_tf_utils import ( TFCausalLanguageModelingLoss, TFConv1D, + TFModelInputType, TFPreTrainedModel, TFSequenceClassificationLoss, TFSequenceSummary, @@ -510,18 +512,18 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids=None, - attention_mask=None, - token_type_ids=None, - position_ids=None, - head_mask=None, - inputs_embeds=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - training=False, + input_ids: Optional[TFModelInputType] = None, + attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + training: Optional[bool] = False, **kwargs, - ): + ) -> Union[Tuple, TFBaseModelOutput]: outputs = self.transformer( input_ids=input_ids, @@ -573,19 +575,19 @@ def set_output_embeddings(self, value): ) def call( self, - input_ids=None, - attention_mask=None, - token_type_ids=None, - position_ids=None, - head_mask=None, - inputs_embeds=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - training=False, + input_ids: Optional[TFModelInputType] = None, + attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + training: Optional[bool] = False, **kwargs, - ): + ) -> Union[Tuple, TFCausalLMOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the cross entropy classification loss. Indices should be in `[0, ..., @@ -656,19 +658,19 @@ def __init__(self, config, *inputs, **kwargs): @replace_return_docstrings(output_type=TFOpenAIGPTDoubleHeadsModelOutput, config_class=_CONFIG_FOR_DOC) def call( self, - input_ids=None, - attention_mask=None, - token_type_ids=None, - position_ids=None, - head_mask=None, - inputs_embeds=None, - mc_token_ids=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - training=False, + input_ids: Optional[TFModelInputType] = None, + attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + mc_token_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + training: Optional[bool] = False, **kwargs, - ): + ) -> Union[Tuple, TFOpenAIGPTDoubleHeadsModelOutput]: r""" mc_token_ids (`tf.Tensor` or `Numpy array` of shape `(batch_size, num_choices)`, *optional*, default to index of the last token of the input): Index of the classification token in each input sequence. Selected in the range `[0, input_ids.size(-1) - @@ -800,19 +802,19 @@ def __init__(self, config, *inputs, **kwargs): ) def call( self, - input_ids=None, - attention_mask=None, - token_type_ids=None, - position_ids=None, - head_mask=None, - inputs_embeds=None, - output_attentions=None, - output_hidden_states=None, - return_dict=None, - labels=None, - training=False, + input_ids: Optional[TFModelInputType] = None, + attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None, + head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None, + inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + labels: Optional[Union[np.ndarray, tf.Tensor]] = None, + training: Optional[bool] = False, **kwargs, - ): + ) -> Union[Tuple, TFSequenceClassifierOutput]: r""" labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*): Labels for computing the cross entropy classification loss. Indices should be in `[0, ...,