diff --git a/src/transformers/configuration_deberta.py b/src/transformers/configuration_deberta.py index 6098d4375ea4..25181ea51538 100644 --- a/src/transformers/configuration_deberta.py +++ b/src/transformers/configuration_deberta.py @@ -28,8 +28,14 @@ class DebertaConfig(PretrainedConfig): r""" - :class:`~transformers.DebertaConfig` is the configuration class to store the configuration of a - :class:`~transformers.DebertaModel`. + This is the configuration class to store the configuration of a :class:`~transformers.DebertaModel` or a + :class:`~transformers.TFDebertaModel`. It is used to instantiate a DeBERTa model according to the specified + arguments, defining the model architecture. Instantiating a configuration with the defaults will yield a similar + configuration to that of the DeBERTa `microsoft/deberta-base `__ + architecture. + + Configuration objects inherit from :class:`~transformers.PretrainedConfig` and can be used to control the model + outputs. Read the documentation from :class:`~transformers.PretrainedConfig` for more information. Arguments: vocab_size (:obj:`int`, `optional`, defaults to 30522): diff --git a/src/transformers/modeling_deberta.py b/src/transformers/modeling_deberta.py index 060e762f9f0c..a5558f31ea9f 100644 --- a/src/transformers/modeling_deberta.py +++ b/src/transformers/modeling_deberta.py @@ -797,13 +797,18 @@ def _init_weights(self, module): `What are input IDs? <../glossary.html#input-ids>`__ attention_mask (:obj:`torch.FloatTensor` of shape :obj:`{0}`, `optional`): - Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for - tokens that are NOT MASKED, ``0`` for MASKED tokens. + Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. `What are attention masks? <../glossary.html#attention-mask>`__ token_type_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, - 1]``: ``0`` corresponds to a `sentence A` token, ``1`` corresponds to a `sentence B` token + 1]``: + + - 0 corresponds to a `sentence A` token, + - 1 corresponds to a `sentence B` token. `What are token type IDs? <../glossary.html#token-type-ids>`_ position_ids (:obj:`torch.LongTensor` of shape :obj:`{0}`, `optional`): @@ -816,14 +821,13 @@ def _init_weights(self, module): This is useful if you want more control over how to convert `input_ids` indices into associated vectors than the model's internal embedding lookup matrix. output_attentions (:obj:`bool`, `optional`): - If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under - returned tensors for more detail. - output_hidden_states (:obj:`bool`, `optional`): - If set to ``True``, the hidden states of all layers are returned. See ``hidden_states`` under returned + Whether or not to return the attentions tensors of all attention layers. See ``attentions`` under returned tensors for more detail. + output_hidden_states (:obj:`bool`, `optional`): + Whether or not to return the hidden states of all layers. See ``hidden_states`` under returned tensors for + more detail. return_dict (:obj:`bool`, `optional`): - If set to ``True``, the model will return a :class:`~transformers.file_utils.ModelOutput` instead of a - plain tuple. + Whether or not to return a :class:`~transformers.file_utils.ModelOutput` instead of a plain tuple. """ diff --git a/src/transformers/tokenization_deberta.py b/src/transformers/tokenization_deberta.py index d27331552b87..e59b34ea2f90 100644 --- a/src/transformers/tokenization_deberta.py +++ b/src/transformers/tokenization_deberta.py @@ -581,7 +581,7 @@ def convert_tokens_to_string(self, tokens): def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): """ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and - adding special tokens. A BERT sequence has the following format: + adding special tokens. A DeBERTa sequence has the following format: - single sequence: [CLS] X [SEP] - pair of sequences: [CLS] A [SEP] B [SEP] @@ -608,14 +608,15 @@ def get_special_tokens_mask(self, token_ids_0, token_ids_1=None, already_has_spe special tokens using the tokenizer ``prepare_for_model`` or ``encode_plus`` methods. Args: - token_ids_0: list of ids (must not contain special tokens) - token_ids_1: Optional list of ids (must not contain special tokens), necessary when fetching sequence ids - for sequence pairs - already_has_special_tokens: (default False) Set to True if the token list is already formated with - special tokens for the model + token_ids_0 (:obj:`List[int]`): + List of IDs. + token_ids_1 (:obj:`List[int]`, `optional`): + Optional second list of IDs for sequence pairs. + already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`): + Whether or not the token list is already formatted with special tokens for the model. Returns: - A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. + :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token. """ if already_has_special_tokens: