Rework automatic code samples in docstrings #20757

ydshieh · 2022-12-13T18:01:52Z

not sure why this is not added say as the last argument and same question below

Because there is no processor_class in the code sample used by this model anymore. It's just in the base model, and I have just left it there in case it's used by other modalities.

-Original file line number
+Diff line change
@@ Expand Up / @@ -56,7 +56,6 @@ @@
     _CHECKPOINT_FOR_DOC = "bert-base-uncased"
     _CONFIG_FOR_DOC = "BertConfig"
-    _TOKENIZER_FOR_DOC = "BertTokenizer"
     # TokenClassification docstring
     _CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbmdz/bert-large-cased-finetuned-conll03-english"
@@ Expand Down Expand Up / @@ -911,7 +910,6 @@ class PreTrainedModel @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=BaseModelOutputWithPoolingAndCrossAttentions,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1184,7 +1182,6 @@ def set_output_embeddings(self, new_embeddings): @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=CausalLMOutputWithCrossAttentions,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1327,7 +1324,6 @@ def set_output_embeddings(self, new_embeddings): @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=MaskedLMOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1535,7 +1531,6 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
             output_type=SequenceClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1637,7 +1632,6 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=MultipleChoiceModelOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1736,7 +1730,6 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
             output_type=TokenClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1819,7 +1812,6 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_QA,
             output_type=QuestionAnsweringModelOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -51,21 +51,8 @@ @@
     _CHECKPOINT_FOR_DOC = "google/bigbird-pegasus-large-arxiv"
     _CONFIG_FOR_DOC = "BigBirdPegasusConfig"
-    _TOKENIZER_FOR_DOC = "PegasusTokenizerFast"
-    # Base model docstring
     _EXPECTED_OUTPUT_SHAPE = [1, 7, 1024]
-    # SequenceClassification docstring
-    _CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-bigbird_pegasus"
-    _SEQ_CLASS_EXPECTED_LOSS = 0.69
-    _SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
-    # QuestionAsnwering docstring
-    _CHECKPOINT_FOR_QA = "hf-internal-testing/tiny-random-bigbird_pegasus"
-    _QA_EXPECTED_LOSS = 3.96
-    _QA_EXPECTED_OUTPUT = "''"
     BIGBIRD_PEGASUS_PRETRAINED_MODEL_ARCHIVE_LIST = [
         "google/bigbird-pegasus-large-arxiv",
@@ Expand Down Expand Up / @@ -2355,7 +2342,6 @@ def custom_forward(*inputs): @@
         "The bare BigBirdPegasus Model outputting raw hidden-states without any specific head on top.",
         BIGBIRD_PEGASUS_START_DOCSTRING,
     )
-    # Copied from transformers.models.bart.modeling_bart.BartModel with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS
     class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
         _keys_to_ignore_on_load_missing = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"]
@@ Expand Down Expand Up / @@ -2387,12 +2373,12 @@ def get_decoder(self): @@
         @add_start_docstrings_to_model_forward(BIGBIRD_PEGASUS_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=Seq2SeqModelOutput,
             config_class=_CONFIG_FOR_DOC,
             expected_output=_EXPECTED_OUTPUT_SHAPE,
         )
+        # Copied from transformers.models.bart.modeling_bart.BartModel.forward with Bart->BigBirdPegasus
         def forward(
             self,
             input_ids: torch.LongTensor = None,
@@ Expand Down Expand Up / @@ -2663,7 +2649,6 @@ def _reorder_cache(past, beam_idx): @@
         """,
         BIGBIRD_PEGASUS_START_DOCSTRING,
     )
-    # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS
     class BigBirdPegasusForSequenceClassification(BigBirdPegasusPreTrainedModel):
         _keys_to_ignore_on_load_missing = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"]
@@ Expand All @@
         @add_start_docstrings_to_model_forward(BIGBIRD_PEGASUS_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
-            checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=Seq2SeqSequenceClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
-            expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
-            expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
         )
+        # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
         def forward(
             self,
             input_ids: torch.LongTensor = None,
@@ Expand Down Expand Up / @@ -2793,7 +2776,6 @@ def forward( @@
         """,
         BIGBIRD_PEGASUS_START_DOCSTRING,
     )
-    # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS
     class BigBirdPegasusForQuestionAnswering(BigBirdPegasusPreTrainedModel):
         _keys_to_ignore_on_load_missing = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"]
@@ Expand All / @@ -2810,13 +2792,11 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(BIGBIRD_PEGASUS_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
-            checkpoint=_CHECKPOINT_FOR_QA,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=Seq2SeqQuestionAnsweringModelOutput,
             config_class=_CONFIG_FOR_DOC,
-            expected_loss=_QA_EXPECTED_LOSS,
-            expected_output=_QA_EXPECTED_OUTPUT,
         )
+        # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward
         def forward(
             self,
             input_ids: torch.Tensor = None,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -47,7 +47,6 @@ @@
     # General docstring
     _CONFIG_FOR_DOC = "Data2VecAudioConfig"
-    _PROCESSOR_FOR_DOC = "Wav2Vec2Processor"
     # Base docstring
     _CHECKPOINT_FOR_DOC = "facebook/data2vec-audio-base-960h"
@@ Expand All / @@ -57,20 +56,6 @@ @@
     _CTC_EXPECTED_OUTPUT = "'MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL'"
     _CTC_EXPECTED_LOSS = 66.95
-    # Audio class docstring
-    _FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor"
-    _SEQ_CLASS_CHECKPOINT = "hf-internal-testing/tiny-random-data2vec-seq-class"
-    _SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'"
-    _SEQ_CLASS_EXPECTED_LOSS = 0.69
-    # Frame class docstring
-    _FRAME_CLASS_CHECKPOINT = "hf-internal-testing/tiny-random-data2vec-audio-frame"
-    _FRAME_EXPECTED_OUTPUT = [1, 1]
-    # Speaker Verification docstring
-    _XVECTOR_CHECKPOINT = "hf-internal-testing/tiny-random-data2vec-xvector"
-    _XVECTOR_EXPECTED_OUTPUT = 1.0
     DATA2VEC_AUDIO_PRETRAINED_MODEL_ARCHIVE_LIST = [
         "facebook/data2vec-audio-base",
@@ Expand Down Expand Up / @@ -917,12 +902,12 @@ def _mask_hidden_states( @@
         @add_start_docstrings_to_model_forward(DATA2VEC_AUDIO_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_PROCESSOR_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=Wav2Vec2BaseModelOutput,
             config_class=_CONFIG_FOR_DOC,
             modality="audio",
             expected_output=_EXPECTED_OUTPUT_SHAPE,
+            processor_class="AutoProcessor",
         )
         def forward(
             self,
@@ Expand Down Expand Up / @@ -981,7 +966,6 @@ def forward( @@
         """Data2VecAudio Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).""",
         DATA2VEC_AUDIO_START_DOCSTRING,
     )
-    # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForCTC with Wav2Vec2->Data2VecAudio, wav2vec2->data2vec_audio, WAV_2_VEC_2->DATA2VEC_AUDIO
     class Data2VecAudioForCTC(Data2VecAudioPreTrainedModel):
         def __init__(self, config):
             super().__init__(config)
@@ Expand Down Expand Up / @@ -1025,13 +1009,13 @@ def freeze_feature_encoder(self): @@
         @add_start_docstrings_to_model_forward(DATA2VEC_AUDIO_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_PROCESSOR_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=CausalLMOutput,
             config_class=_CONFIG_FOR_DOC,
             expected_output=_CTC_EXPECTED_OUTPUT,
             expected_loss=_CTC_EXPECTED_LOSS,
         )
+        # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForCTC.forward with wav2vec2->data2vec_audio
         def forward(
             self,
             input_values: Optional[torch.Tensor],
@@ Expand Down Expand Up / @@ -1112,7 +1096,6 @@ def forward( @@
         """,
         DATA2VEC_AUDIO_START_DOCSTRING,
     )
-    # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification with Wav2Vec2->Data2VecAudio, wav2vec2->data2vec_audio, WAV_2_VEC_2->DATA2VEC_AUDIO
     class Data2VecAudioForSequenceClassification(Data2VecAudioPreTrainedModel):
         def __init__(self, config):
             super().__init__(config)
@@ Expand Down Expand Up / @@ -1160,14 +1143,12 @@ def freeze_base_model(self): @@
         @add_start_docstrings_to_model_forward(DATA2VEC_AUDIO_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_FEAT_EXTRACTOR_FOR_DOC,
-            checkpoint=_SEQ_CLASS_CHECKPOINT,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=SequenceClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
             modality="audio",
-            expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
-            expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
         )
+        # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.forward with wav2vec2->data2vec_audio
         def forward(
             self,
             input_values: Optional[torch.Tensor],
@@ Expand Down Expand Up / @@ -1236,7 +1217,6 @@ def forward( @@
         """,
         DATA2VEC_AUDIO_START_DOCSTRING,
     )
-    # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForAudioFrameClassification with Wav2Vec2->Data2VecAudio, wav2vec2->data2vec_audio, WAV_2_VEC_2->DATA2VEC_AUDIO
     class Data2VecAudioForAudioFrameClassification(Data2VecAudioPreTrainedModel):
         def __init__(self, config):
             super().__init__(config)
@@ Expand Down Expand Up / @@ -1284,13 +1264,12 @@ def freeze_base_model(self): @@
         @add_start_docstrings_to_model_forward(DATA2VEC_AUDIO_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_FEAT_EXTRACTOR_FOR_DOC,
-            checkpoint=_FRAME_CLASS_CHECKPOINT,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=TokenClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
             modality="audio",
-            expected_output=_FRAME_EXPECTED_OUTPUT,
         )
+        # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForAudioFrameClassification.forward with wav2vec2->data2vec_audio
         def forward(
             self,
             input_values: Optional[torch.Tensor],
@@ Expand Down Expand Up / @@ -1402,7 +1381,6 @@ def forward(self, hidden_states): @@
         """,
         DATA2VEC_AUDIO_START_DOCSTRING,
     )
-    # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForXVector with Wav2Vec2->Data2VecAudio, wav2vec2->data2vec_audio, WAV_2_VEC_2->DATA2VEC_AUDIO
     class Data2VecAudioForXVector(Data2VecAudioPreTrainedModel):
         def __init__(self, config):
             super().__init__(config)
@@ Expand Down Expand Up @@
         @add_start_docstrings_to_model_forward(DATA2VEC_AUDIO_INPUTS_DOCSTRING)
         @add_code_sample_docstrings(
-            processor_class=_FEAT_EXTRACTOR_FOR_DOC,
-            checkpoint=_XVECTOR_CHECKPOINT,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=XVectorOutput,
             config_class=_CONFIG_FOR_DOC,
             modality="audio",
-            expected_output=_XVECTOR_EXPECTED_OUTPUT,
         )
+        # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForXVector.forward with wav2vec2->data2vec_audio
         def forward(
             self,
             input_values: Optional[torch.Tensor],
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -38,34 +38,20 @@ @@
     logger = logging.get_logger(__name__)
     _CONFIG_FOR_DOC = "DebertaConfig"
-    _TOKENIZER_FOR_DOC = "DebertaTokenizer"
     _CHECKPOINT_FOR_DOC = "microsoft/deberta-base"
     # Masked LM docstring
     _CHECKPOINT_FOR_MASKED_LM = "lsanochkin/deberta-large-feedback"
     _MASKED_LM_EXPECTED_OUTPUT = "' Paris'"
     _MASKED_LM_EXPECTED_LOSS = "0.54"
-    # TokenClassification docstring
-    _CHECKPOINT_FOR_TOKEN_CLASSIFICATION = "dbsamu/deberta-base-finetuned-ner"
-    _TOKEN_CLASS_EXPECTED_OUTPUT = (
-        "['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0',"
-        " 'LABEL_0', 'LABEL_0']"
-    )
-    _TOKEN_CLASS_EXPECTED_LOSS = 0.04
     # QuestionAnswering docstring
     _CHECKPOINT_FOR_QA = "Palak/microsoft_deberta-large_squad"
     _QA_EXPECTED_OUTPUT = "' a nice puppet'"
     _QA_EXPECTED_LOSS = 0.14
     _QA_TARGET_START_INDEX = 12
     _QA_TARGET_END_INDEX = 14
-    # SequenceClassification docstring
-    _CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-deberta"
-    _SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'"
-    _SEQ_CLASS_EXPECTED_LOSS = "0.69"
     DEBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [
         "microsoft/deberta-base",
@@ Expand Down Expand Up / @@ -950,7 +936,6 @@ class PreTrainedModel @@
         @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=BaseModelOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1057,7 +1042,6 @@ def set_output_embeddings(self, new_embeddings): @@
         @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_MASKED_LM,
             output_type=MaskedLMOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down Expand Up / @@ -1201,12 +1185,9 @@ def set_input_embeddings(self, new_embeddings): @@
         @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
-            checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=SequenceClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
-            expected_output=_SEQ_CLASS_EXPECTED_OUTPUT,
-            expected_loss=_SEQ_CLASS_EXPECTED_LOSS,
         )
         def forward(
             self,
@@ Expand Down Expand Up / @@ -1311,12 +1292,9 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
-            checkpoint=_CHECKPOINT_FOR_TOKEN_CLASSIFICATION,
+            checkpoint=_CHECKPOINT_FOR_DOC,
             output_type=TokenClassifierOutput,
             config_class=_CONFIG_FOR_DOC,
-            expected_output=_TOKEN_CLASS_EXPECTED_OUTPUT,
-            expected_loss=_TOKEN_CLASS_EXPECTED_LOSS,
         )
         def forward(
             self,
@@ Expand Down Expand Up / @@ -1388,7 +1366,6 @@ def __init__(self, config): @@
         @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
         @add_code_sample_docstrings(
-            processor_class=_TOKENIZER_FOR_DOC,
             checkpoint=_CHECKPOINT_FOR_QA,
             output_type=QuestionAnsweringModelOutput,
             config_class=_CONFIG_FOR_DOC,
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Rework automatic code samples in docstrings #20757

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ydshieh Dec 13, 2022

Uh oh!

sgugger Dec 13, 2022

Uh oh!

Uh oh!

Rework automatic code samples in docstrings #20757

Uh oh!

Rework automatic code samples in docstrings #20757

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ydshieh Dec 13, 2022

Choose a reason for hiding this comment

Uh oh!

sgugger Dec 13, 2022

Choose a reason for hiding this comment

Uh oh!

Uh oh!