Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/transformers/models/hubert/modeling_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,10 @@

logger = logging.get_logger(__name__)

_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor"


_HIDDEN_STATES_START_POSITION = 1

# General docstring
_CONFIG_FOR_DOC = "HubertConfig"
_PROCESSOR_FOR_DOC = "Wav2Vec2Processor"

# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/hubert-large-ls960-ft"
Expand All @@ -59,7 +55,6 @@
_CTC_EXPECTED_LOSS = 22.68

# Audio class docstring
_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor"
_SEQ_CLASS_CHECKPOINT = "superb/hubert-base-superb-ks"
_SEQ_CLASS_EXPECTED_OUTPUT = "'_unknown_'"
_SEQ_CLASS_EXPECTED_LOSS = 8.53
Expand Down Expand Up @@ -1145,7 +1140,6 @@ def freeze_feature_encoder(self):

@add_start_docstrings_to_model_forward(HUBERT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_PROCESSOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=CausalLMOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -1280,7 +1274,6 @@ def freeze_base_model(self):

@add_start_docstrings_to_model_forward(HUBERT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_SEQ_CLASS_CHECKPOINT,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down
4 changes: 0 additions & 4 deletions src/transformers/models/longformer/modeling_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@

_CHECKPOINT_FOR_DOC = "allenai/longformer-base-4096"
_CONFIG_FOR_DOC = "LongformerConfig"
_TOKENIZER_FOR_DOC = "LongformerTokenizer"

LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"allenai/longformer-base-4096",
Expand Down Expand Up @@ -1903,7 +1902,6 @@ def __init__(self, config):

@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint="jpwahle/longformer-base-plagiarism-detection",
output_type=LongformerSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -2172,7 +2170,6 @@ def __init__(self, config):

@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint="brad1141/Longformer-finetuned-norm",
output_type=LongformerTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -2260,7 +2257,6 @@ def __init__(self, config):
LONGFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")
)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=LongformerMultipleChoiceModelOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down
12 changes: 2 additions & 10 deletions src/transformers/models/longformer/modeling_tf_longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2382,11 +2382,9 @@ def __init__(self, config, *inputs, **kwargs):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint="hf-internal-testing/tiny-random-longformer",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFLongformerSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="'LABEL_1'",
expected_loss=0.69,
)
def call(
self,
Expand Down Expand Up @@ -2637,15 +2635,9 @@ def __init__(self, config, *inputs, **kwargs):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint="hf-internal-testing/tiny-random-longformer",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFLongformerTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=(
"['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1',"
" 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1',"
" 'LABEL_1', 'LABEL_1']"
),
expected_loss=0.59,
)
def call(
self,
Expand Down
23 changes: 2 additions & 21 deletions src/transformers/models/mbart/modeling_mbart.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,10 @@

_CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25"
_CONFIG_FOR_DOC = "MBartConfig"
_TOKENIZER_FOR_DOC = "MBartTokenizer"

# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024]

# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-mbart"
_SEQ_CLASS_EXPECTED_LOSS = 0.69
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"

# QuestionAsnwering docstring
_CHECKPOINT_FOR_QA = "hf-internal-testing/tiny-random-mbart"
_QA_EXPECTED_LOSS = 3.55
_QA_EXPECTED_OUTPUT = "'? Jim Henson was a'"


MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/mbart-large-cc25",
# See all MBART models at https://huggingface.co/models?filter=mbart
Expand Down Expand Up @@ -1187,7 +1175,6 @@ def get_decoder(self):

@add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -1467,12 +1454,9 @@ def __init__(self, config: MBartConfig, **kwargs):

@add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
def forward(
Expand Down Expand Up @@ -1596,12 +1580,9 @@ def __init__(self, config):

@add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_QA,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
expected_loss=_QA_EXPECTED_LOSS,
expected_output=_QA_EXPECTED_OUTPUT,
)
# Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward
def forward(
Expand Down
16 changes: 1 addition & 15 deletions src/transformers/models/plbart/modeling_plbart.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,6 @@

_CHECKPOINT_FOR_DOC = "uclanlp/plbart-base"
_CONFIG_FOR_DOC = "PLBartConfig"
_TOKENIZER_FOR_DOC = "PLBartTokenizer"

# Base model docstring
_EXPECTED_OUTPUT_SHAPE = [1, 8, 768]

# SequenceClassification docstring
_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-plbart"
_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
_SEQ_CLASS_EXPECTED_LOSS = 0.69


PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [
"uclanlp/plbart-base",
Expand Down Expand Up @@ -1161,7 +1151,6 @@ def get_decoder(self):

@add_start_docstrings_to_model_forward(PLBART_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqModelOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -1438,12 +1427,9 @@ def __init__(self, config: PLBartConfig, **kwargs):

@add_start_docstrings_to_model_forward(PLBART_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Seq2SeqSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
)
# Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
def forward(
Expand Down
70 changes: 16 additions & 54 deletions src/transformers/models/reformer/modeling_reformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@

_CHECKPOINT_FOR_DOC = "google/reformer-crime-and-punishment"
_CONFIG_FOR_DOC = "ReformerConfig"
_TOKENIZER_FOR_DOC = "ReformerTokenizer"

REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"google/reformer-crime-and-punishment",
Expand Down Expand Up @@ -2009,7 +2008,6 @@ class PreTrainedModel

@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=ReformerModelOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -2220,7 +2218,6 @@ def set_output_embeddings(self, new_embeddings):

@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=CausalLMOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -2360,13 +2357,20 @@ def forward(

Returns:

<Tip warning={true}>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could use _CHECKPOINT_FOR_DOC but remove the parts that check the expected output values, and we don't need this warning?

At least, this is what you have done for some models that used tiny-random-xxx checkpoints.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Hmm, it raises a question: when we use real checkpoints which don't contain head weights, so far we don't include the output checks, but we don't do any warning. The warning introduced in your last PR is only used for 2 models so far where no small enough real checkpoints exist).

Would like to hear from you regarding this - as I remember one major reason is to avoid user confusion regarding the results they get.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here the checkpoint fails because it has is_decoder=True and we need False to use the MLM head.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, thanks for explaining :-)


This example uses a false checkpoint since we don't have any available pretrained model for the masked language
modeling task with the Reformer architecture.

</Tip>

Example:

```python
>>> import torch
>>> from transformers import ReformerTokenizer, ReformerForMaskedLM
>>> from transformers import AutoTokenizer, ReformerForMaskedLM

>>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
>>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
>>> model = ReformerForMaskedLM.from_pretrained("hf-internal-testing/tiny-random-reformer")

>>> # add mask_token
Expand Down Expand Up @@ -2479,10 +2483,10 @@ def forward(

```python
>>> import torch
>>> from transformers import ReformerTokenizer, ReformerForSequenceClassification
>>> from transformers import AutoTokenizer, ReformerForSequenceClassification

>>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
>>> model = ReformerForSequenceClassification.from_pretrained("hf-internal-testing/tiny-random-reformer")
>>> tokenizer = AutoTokenizer.from_pretrained("google/reformer-crime-and-punishment")
>>> model = ReformerForSequenceClassification.from_pretrained("google/reformer-crime-and-punishment")

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

Expand All @@ -2491,59 +2495,20 @@ def forward(

>>> predicted_class_id = logits.argmax().item()
>>> model.config.id2label[predicted_class_id]
'LABEL_1'
'LABEL_0'
```

```python
>>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
>>> num_labels = len(model.config.id2label)
>>> model = ReformerForSequenceClassification.from_pretrained(
... "hf-internal-testing/tiny-random-reformer", num_labels=num_labels
... "google/reformer-crime-and-punishment", num_labels=num_labels
... )

>>> labels = torch.tensor(1)
>>> loss = model(**inputs, labels=labels).loss
>>> round(loss.item(), 2)
0.69
```

Example of multi-label classification:

```python
>>> import torch
>>> from transformers import ReformerTokenizer, ReformerForSequenceClassification

>>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
>>> model = ReformerForSequenceClassification.from_pretrained(
... "hf-internal-testing/tiny-random-reformer", problem_type="multi_label_classification"
... )

>>> # add pad_token
>>> tokenizer.add_special_tokens({"pad_token": "[PAD]"}) # doctest: +IGNORE_RESULT
>>> inputs = tokenizer("Hello, my dog is cute", max_length=100, padding="max_length", return_tensors="pt")

>>> with torch.no_grad():
... logits = model(**inputs).logits

>>> predicted_class_id = logits.argmax().item()
>>> model.config.id2label[predicted_class_id]
'LABEL_1'
```

```python
>>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
>>> num_labels = len(model.config.id2label)
>>> model = ReformerForSequenceClassification.from_pretrained(
... "hf-internal-testing/tiny-random-reformer", num_labels=num_labels
... )
>>> model.train() # doctest: +IGNORE_RESULT

>>> num_labels = len(model.config.id2label)
>>> labels = torch.nn.functional.one_hot(torch.tensor([predicted_class_id]), num_classes=num_labels).to(
... torch.float
... )
>>> loss = model(**inputs, labels=labels).loss
>>> loss.backward() # doctest: +IGNORE_RESULT
0.68
```
"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
Expand Down Expand Up @@ -2641,12 +2606,9 @@ def __init__(self, config):

@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC,
checkpoint="hf-internal-testing/tiny-random-reformer",
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=QuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC,
expected_output="''",
expected_loss=3.28,
)
def forward(
self,
Expand Down
9 changes: 0 additions & 9 deletions src/transformers/models/sew/modeling_sew.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,11 @@

logger = logging.get_logger(__name__)

_PROCESSOR_FOR_DOC = "Wav2Vec2Processor"
_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor"


_HIDDEN_STATES_START_POSITION = 1


# General docstring
_CONFIG_FOR_DOC = "SEWConfig"
_PROCESSOR_FOR_DOC = "Wav2Vec2Processor"

# Base docstring
_CHECKPOINT_FOR_DOC = "asapp/sew-tiny-100k-ft-ls100h"
Expand All @@ -58,7 +53,6 @@
_CTC_EXPECTED_LOSS = 0.42

# Audio class docstring
_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor"
_SEQ_CLASS_CHECKPOINT = "anton-l/sew-mid-100k-ft-keyword-spotting"
_SEQ_CLASS_EXPECTED_OUTPUT = "'_unknown_'"
_SEQ_CLASS_EXPECTED_LOSS = 9.52
Expand Down Expand Up @@ -916,7 +910,6 @@ def _mask_hidden_states(

@add_start_docstrings_to_model_forward(SEW_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_PROCESSOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -1020,7 +1013,6 @@ def freeze_feature_encoder(self):

@add_start_docstrings_to_model_forward(SEW_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_PROCESSOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=CausalLMOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down Expand Up @@ -1155,7 +1147,6 @@ def freeze_base_model(self):

@add_start_docstrings_to_model_forward(SEW_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_SEQ_CLASS_CHECKPOINT,
output_type=SequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
Expand Down
Loading