diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index f0f31e87716e..59b199e26d27 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -41,14 +41,10 @@ logger = logging.get_logger(__name__) -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" - - _HIDDEN_STATES_START_POSITION = 1 # General docstring _CONFIG_FOR_DOC = "HubertConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "facebook/hubert-large-ls960-ft" @@ -59,7 +55,6 @@ _CTC_EXPECTED_LOSS = 22.68 # Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" _SEQ_CLASS_CHECKPOINT = "superb/hubert-base-superb-ks" _SEQ_CLASS_EXPECTED_OUTPUT = "'_unknown_'" _SEQ_CLASS_EXPECTED_LOSS = 8.53 @@ -1145,7 +1140,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(HUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1280,7 +1274,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(HUBERT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_SEQ_CLASS_CHECKPOINT, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 137e99a67c01..606459067df5 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -41,7 +41,6 @@ _CHECKPOINT_FOR_DOC = "allenai/longformer-base-4096" _CONFIG_FOR_DOC = "LongformerConfig" -_TOKENIZER_FOR_DOC = "LongformerTokenizer" LONGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ "allenai/longformer-base-4096", @@ -1903,7 +1902,6 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint="jpwahle/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -2172,7 +2170,6 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint="brad1141/Longformer-finetuned-norm", output_type=LongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -2260,7 +2257,6 @@ def __init__(self, config): LONGFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") ) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=LongformerMultipleChoiceModelOutput, config_class=_CONFIG_FOR_DOC, diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 7c929f353d30..b50231f659f9 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2382,11 +2382,9 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint="hf-internal-testing/tiny-random-longformer", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="'LABEL_1'", - expected_loss=0.69, ) def call( self, @@ -2637,15 +2635,9 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint="hf-internal-testing/tiny-random-longformer", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output=( - "['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1'," - " 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1'," - " 'LABEL_1', 'LABEL_1']" - ), - expected_loss=0.59, ) def call( self, diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index 421393195e44..1cf3b00ce328 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -49,22 +49,10 @@ _CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25" _CONFIG_FOR_DOC = "MBartConfig" -_TOKENIZER_FOR_DOC = "MBartTokenizer" # Base model docstring _EXPECTED_OUTPUT_SHAPE = [1, 8, 1024] -# SequenceClassification docstring -_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-random-mbart" -_SEQ_CLASS_EXPECTED_LOSS = 0.69 -_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'" - -# QuestionAsnwering docstring -_CHECKPOINT_FOR_QA = "hf-internal-testing/tiny-random-mbart" -_QA_EXPECTED_LOSS = 3.55 -_QA_EXPECTED_OUTPUT = "'? Jim Henson was a'" - - MBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ "facebook/mbart-large-cc25", # See all MBART models at https://huggingface.co/models?filter=mbart @@ -1187,7 +1175,6 @@ def get_decoder(self): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1467,12 +1454,9 @@ def __init__(self, config: MBartConfig, **kwargs): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, - expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( @@ -1596,12 +1580,9 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_QA, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, - expected_loss=_QA_EXPECTED_LOSS, - expected_output=_QA_EXPECTED_OUTPUT, ) # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward def forward( diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index 1e2bc3bcc655..98017c48c31b 100644 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -48,16 +48,6 @@ _CHECKPOINT_FOR_DOC = "uclanlp/plbart-base" _CONFIG_FOR_DOC = "PLBartConfig" -_TOKENIZER_FOR_DOC = "PLBartTokenizer" - -# Base model docstring -_EXPECTED_OUTPUT_SHAPE = [1, 8, 768] - -# SequenceClassification docstring -_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "hf-internal-testing/tiny-plbart" -_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'" -_SEQ_CLASS_EXPECTED_LOSS = 0.69 - PLBART_PRETRAINED_MODEL_ARCHIVE_LIST = [ "uclanlp/plbart-base", @@ -1161,7 +1151,6 @@ def get_decoder(self): @add_start_docstrings_to_model_forward(PLBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1438,12 +1427,9 @@ def __init__(self, config: PLBartConfig, **kwargs): @add_start_docstrings_to_model_forward(PLBART_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=Seq2SeqSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, - expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward def forward( diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index 0cca45422eaf..3ceaf24a42e2 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -49,7 +49,6 @@ _CHECKPOINT_FOR_DOC = "google/reformer-crime-and-punishment" _CONFIG_FOR_DOC = "ReformerConfig" -_TOKENIZER_FOR_DOC = "ReformerTokenizer" REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ "google/reformer-crime-and-punishment", @@ -2009,7 +2008,6 @@ class PreTrainedModel @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=ReformerModelOutput, config_class=_CONFIG_FOR_DOC, @@ -2220,7 +2218,6 @@ def set_output_embeddings(self, new_embeddings): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -2360,13 +2357,20 @@ def forward( Returns: + + + This example uses a false checkpoint since we don't have any available pretrained model for the masked language + modeling task with the Reformer architecture. + + + Example: ```python >>> import torch - >>> from transformers import ReformerTokenizer, ReformerForMaskedLM + >>> from transformers import AutoTokenizer, ReformerForMaskedLM - >>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer") + >>> tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer") >>> model = ReformerForMaskedLM.from_pretrained("hf-internal-testing/tiny-random-reformer") >>> # add mask_token @@ -2479,10 +2483,10 @@ def forward( ```python >>> import torch - >>> from transformers import ReformerTokenizer, ReformerForSequenceClassification + >>> from transformers import AutoTokenizer, ReformerForSequenceClassification - >>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer") - >>> model = ReformerForSequenceClassification.from_pretrained("hf-internal-testing/tiny-random-reformer") + >>> tokenizer = AutoTokenizer.from_pretrained("google/reformer-crime-and-punishment") + >>> model = ReformerForSequenceClassification.from_pretrained("google/reformer-crime-and-punishment") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") @@ -2491,59 +2495,20 @@ def forward( >>> predicted_class_id = logits.argmax().item() >>> model.config.id2label[predicted_class_id] - 'LABEL_1' + 'LABEL_0' ``` ```python >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = ReformerForSequenceClassification.from_pretrained( - ... "hf-internal-testing/tiny-random-reformer", num_labels=num_labels + ... "google/reformer-crime-and-punishment", num_labels=num_labels ... ) >>> labels = torch.tensor(1) >>> loss = model(**inputs, labels=labels).loss >>> round(loss.item(), 2) - 0.69 - ``` - - Example of multi-label classification: - - ```python - >>> import torch - >>> from transformers import ReformerTokenizer, ReformerForSequenceClassification - - >>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer") - >>> model = ReformerForSequenceClassification.from_pretrained( - ... "hf-internal-testing/tiny-random-reformer", problem_type="multi_label_classification" - ... ) - - >>> # add pad_token - >>> tokenizer.add_special_tokens({"pad_token": "[PAD]"}) # doctest: +IGNORE_RESULT - >>> inputs = tokenizer("Hello, my dog is cute", max_length=100, padding="max_length", return_tensors="pt") - - >>> with torch.no_grad(): - ... logits = model(**inputs).logits - - >>> predicted_class_id = logits.argmax().item() - >>> model.config.id2label[predicted_class_id] - 'LABEL_1' - ``` - - ```python - >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` - >>> num_labels = len(model.config.id2label) - >>> model = ReformerForSequenceClassification.from_pretrained( - ... "hf-internal-testing/tiny-random-reformer", num_labels=num_labels - ... ) - >>> model.train() # doctest: +IGNORE_RESULT - - >>> num_labels = len(model.config.id2label) - >>> labels = torch.nn.functional.one_hot(torch.tensor([predicted_class_id]), num_classes=num_labels).to( - ... torch.float - ... ) - >>> loss = model(**inputs, labels=labels).loss - >>> loss.backward() # doctest: +IGNORE_RESULT + 0.68 ``` """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -2641,12 +2606,9 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_TOKENIZER_FOR_DOC, - checkpoint="hf-internal-testing/tiny-random-reformer", + checkpoint=_CHECKPOINT_FOR_DOC, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, - expected_output="''", - expected_loss=3.28, ) def forward( self, diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index bed9e9fbdf0c..e358c75bb557 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -36,16 +36,11 @@ logger = logging.get_logger(__name__) -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" - _HIDDEN_STATES_START_POSITION = 1 - # General docstring _CONFIG_FOR_DOC = "SEWConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "asapp/sew-tiny-100k-ft-ls100h" @@ -58,7 +53,6 @@ _CTC_EXPECTED_LOSS = 0.42 # Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" _SEQ_CLASS_CHECKPOINT = "anton-l/sew-mid-100k-ft-keyword-spotting" _SEQ_CLASS_EXPECTED_OUTPUT = "'_unknown_'" _SEQ_CLASS_EXPECTED_LOSS = 9.52 @@ -916,7 +910,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(SEW_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1020,7 +1013,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(SEW_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1155,7 +1147,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(SEW_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_SEQ_CLASS_CHECKPOINT, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, diff --git a/src/transformers/models/sew_d/modeling_sew_d.py b/src/transformers/models/sew_d/modeling_sew_d.py index 804f78c26cdf..64c7b006363d 100644 --- a/src/transformers/models/sew_d/modeling_sew_d.py +++ b/src/transformers/models/sew_d/modeling_sew_d.py @@ -42,7 +42,6 @@ # General docstring _CONFIG_FOR_DOC = "SEWDConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "asapp/sew-d-tiny-100k-ft-ls100h" @@ -53,7 +52,6 @@ _CTC_EXPECTED_LOSS = 0.21 # Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" _SEQ_CLASS_CHECKPOINT = "anton-l/sew-d-mid-400k-ft-keyword-spotting" _SEQ_CLASS_EXPECTED_OUTPUT = "'_unknown_'" _SEQ_CLASS_EXPECTED_LOSS = 3.16 @@ -1453,7 +1451,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(SEWD_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1557,7 +1554,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(SEWD_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1692,7 +1688,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(SEWD_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_SEQ_CLASS_CHECKPOINT, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index 2949a0201963..11d7a30ad9c8 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -48,7 +48,6 @@ # General docstring _CONFIG_FOR_DOC = "UniSpeechConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "patrickvonplaten/unispeech-large-1500h-cv-timit" @@ -58,12 +57,6 @@ _CTC_EXPECTED_OUTPUT = "'mister quilter is the apposl of the midle classes and weare glad to welcom his gosepl'" _CTC_EXPECTED_LOSS = 17.17 -# Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" -_SEQ_CLASS_CHECKPOINT = "hf-internal-testing/tiny-random-unispeech" -_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'" # TODO(anton) - could you quickly fine-tune a KS WavLM Model -_SEQ_CLASS_EXPECTED_LOSS = 0.66 # TODO(anton) - could you quickly fine-tune a KS WavLM Model - UNISPEECH_PRETRAINED_MODEL_ARCHIVE_LIST = [ "microsoft/unispeech-large-1500h-cv", "microsoft/unispeech-large-multi-lingual-1500h-cv", @@ -1143,7 +1136,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(UNISPEECH_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Wav2Vec2BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1286,12 +1278,9 @@ def forward( ```python >>> import torch - >>> from transformers import Wav2Vec2FeatureExtractor, UniSpeechForPreTraining - >>> from transformers.models.unispeech.modeling_unispeech import _compute_mask_indices + >>> from transformers import AutoFeatureExtractor, UniSpeechForPreTraining - >>> feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained( - ... "hf-internal-testing/tiny-random-unispeech-sat" - ... ) + >>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/unispeech-large-1500h-cv") >>> model = UniSpeechForPreTraining.from_pretrained("microsoft/unispeech-large-1500h-cv") >>> # TODO: Add full pretraining example ```""" @@ -1395,7 +1384,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(UNISPEECH_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1482,7 +1470,6 @@ def forward( """, UNISPEECH_START_DOCSTRING, ) -# Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification with Wav2Vec2->UniSpeech, wav2vec2->unispeech, WAV_2_VEC_2->UNISPEECH class UniSpeechForSequenceClassification(UniSpeechPreTrainedModel): def __init__(self, config): super().__init__(config) @@ -1501,6 +1488,7 @@ def __init__(self, config): # Initialize weights and apply final processing self.post_init() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_feature_extractor def freeze_feature_extractor(self): """ Calling this function will disable the gradient computation for the feature encoder so that its parameters will @@ -1513,6 +1501,7 @@ def freeze_feature_extractor(self): ) self.freeze_feature_encoder() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_feature_encoder with wav2vec2->unispeech def freeze_feature_encoder(self): """ Calling this function will disable the gradient computation for the feature encoder so that its parameter will @@ -1520,6 +1509,7 @@ def freeze_feature_encoder(self): """ self.unispeech.feature_extractor._freeze_parameters() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_base_model with wav2vec2->unispeech def freeze_base_model(self): """ Calling this function will disable the gradient computation for the base model so that its parameters will not @@ -1530,14 +1520,12 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(UNISPEECH_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, - checkpoint=_SEQ_CLASS_CHECKPOINT, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, modality="audio", - expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, - expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.forward with Wav2Vec2->UniSpeech, wav2vec2->unispeech def forward( self, input_values: Optional[torch.Tensor], diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index d1b2074531ed..045da6231b5b 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -55,7 +55,6 @@ # General docstring _CONFIG_FOR_DOC = "UniSpeechSatConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "microsoft/unispeech-sat-base-100h-libri-ft" @@ -65,12 +64,6 @@ _CTC_EXPECTED_OUTPUT = "'MISTER QUILDER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL'" _CTC_EXPECTED_LOSS = 39.88 -# Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" -_SEQ_CLASS_CHECKPOINT = "hf-internal-testing/tiny-random-unispeech-sat" -_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_1'" # TODO(anton) - could you quickly fine-tune a KS WavLM Model -_SEQ_CLASS_EXPECTED_LOSS = 0.71 # TODO(anton) - could you quickly fine-tune a KS WavLM Model - # Frame class docstring _FRAME_CLASS_CHECKPOINT = "microsoft/unispeech-sat-base-plus-sd" _FRAME_EXPECTED_OUTPUT = [0, 0] @@ -1158,7 +1151,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(UNISPEECH_SAT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Wav2Vec2BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1299,10 +1291,10 @@ def forward( ```python >>> import torch - >>> from transformers import Wav2Vec2FeatureExtractor, UniSpeechSatForPreTraining + >>> from transformers import AutoFeatureExtractor, UniSpeechSatForPreTraining >>> from transformers.models.unispeech_sat.modeling_unispeech_sat import _compute_mask_indices - >>> feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("microsoft/unispeech-sat-base") + >>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/unispeech-sat-base") >>> model = UniSpeechSatForPreTraining.from_pretrained("microsoft/unispeech-sat-base") >>> # TODO: Add full pretraining example ```""" @@ -1399,7 +1391,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(UNISPEECH_SAT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1486,7 +1477,6 @@ def forward( """, UNISPEECH_SAT_START_DOCSTRING, ) -# Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification with Wav2Vec2->UniSpeechSat, wav2vec2->unispeech_sat, WAV_2_VEC_2->UNISPEECH_SAT class UniSpeechSatForSequenceClassification(UniSpeechSatPreTrainedModel): def __init__(self, config): super().__init__(config) @@ -1505,6 +1495,7 @@ def __init__(self, config): # Initialize weights and apply final processing self.post_init() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_feature_extractor def freeze_feature_extractor(self): """ Calling this function will disable the gradient computation for the feature encoder so that its parameters will @@ -1517,6 +1508,7 @@ def freeze_feature_extractor(self): ) self.freeze_feature_encoder() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_feature_encoder with wav2vec2->unispeech_sat def freeze_feature_encoder(self): """ Calling this function will disable the gradient computation for the feature encoder so that its parameter will @@ -1524,6 +1516,7 @@ def freeze_feature_encoder(self): """ self.unispeech_sat.feature_extractor._freeze_parameters() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_base_model with wav2vec2->unispeech_sat def freeze_base_model(self): """ Calling this function will disable the gradient computation for the base model so that its parameters will not @@ -1534,14 +1527,12 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(UNISPEECH_SAT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, - checkpoint=_SEQ_CLASS_CHECKPOINT, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, modality="audio", - expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, - expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.forward with Wav2Vec2->UniSpeechSat, wav2vec2->unispeech_sat def forward( self, input_values: Optional[torch.Tensor], @@ -1658,7 +1649,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(UNISPEECH_SAT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_FRAME_CLASS_CHECKPOINT, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -1841,7 +1831,6 @@ def _conv_out_length(input_length, kernel_size, stride): @add_start_docstrings_to_model_forward(UNISPEECH_SAT_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_XVECTOR_CHECKPOINT, output_type=XVectorOutput, config_class=_CONFIG_FOR_DOC, diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index cb2aeb7562ef..2a88147e3219 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -56,7 +56,6 @@ # General docstring _CONFIG_FOR_DOC = "Wav2Vec2Config" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "facebook/wav2vec2-base-960h" @@ -67,7 +66,6 @@ _CTC_EXPECTED_LOSS = 53.48 # Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" _SEQ_CLASS_CHECKPOINT = "superb/wav2vec2-base-superb-ks" _SEQ_CLASS_EXPECTED_OUTPUT = "'_unknown_'" _SEQ_CLASS_EXPECTED_LOSS = 6.54 @@ -1279,7 +1277,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(WAV_2_VEC_2_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Wav2Vec2BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1655,7 +1652,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(WAV_2_VEC_2_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1789,7 +1785,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(WAV_2_VEC_2_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_SEQ_CLASS_CHECKPOINT, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -1911,7 +1906,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(WAV_2_VEC_2_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_FRAME_CLASS_CHECKPOINT, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -2091,7 +2085,6 @@ def _conv_out_length(input_length, kernel_size, stride): @add_start_docstrings_to_model_forward(WAV_2_VEC_2_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_XVECTOR_CHECKPOINT, output_type=XVectorOutput, config_class=_CONFIG_FOR_DOC, diff --git a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py index d72522d294be..5649f4c81ed3 100644 --- a/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +++ b/src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py @@ -54,7 +54,6 @@ # General docstring _CONFIG_FOR_DOC = "Wav2Vec2ConformerConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "facebook/wav2vec2-conformer-rope-large-960h-ft" @@ -64,20 +63,6 @@ _CTC_EXPECTED_OUTPUT = "'MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL'" _CTC_EXPECTED_LOSS = 64.21 -# Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" -_SEQ_CLASS_CHECKPOINT = "hf-internal-testing/wav2vec2-conformer-seq-class" -_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'" -_SEQ_CLASS_EXPECTED_LOSS = 0.68 - -# Frame class docstring -_FRAME_CLASS_CHECKPOINT = "hf-internal-testing/wav2vec2-conformer-frame-class" -_FRAME_EXPECTED_OUTPUT = [1, 0] - -# Speaker Verification docstring -_XVECTOR_CHECKPOINT = "hf-internal-testing/wav2vec2-conformer-xvector" -_XVECTOR_EXPECTED_OUTPUT = 1.0 - WAV2VEC2_CONFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ "facebook/wav2vec2-conformer-rel-pos-large", @@ -1324,7 +1309,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(WAV2VEC2_CONFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Wav2Vec2BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1643,7 +1627,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(WAV2VEC2_CONFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1769,13 +1752,10 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(WAV2VEC2_CONFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, - checkpoint=_SEQ_CLASS_CHECKPOINT, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, modality="audio", - expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, - expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.forward with Wav2Vec2->Wav2Vec2Conformer,wav2vec2->wav2vec2_conformer,WAV_2_VEC_2->WAV2VEC2_CONFORMER def forward( @@ -1884,12 +1864,10 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(WAV2VEC2_CONFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, - checkpoint=_FRAME_CLASS_CHECKPOINT, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, modality="audio", - expected_output=_FRAME_EXPECTED_OUTPUT, ) # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForAudioFrameClassification.forward with wav2vec2->wav2vec2_conformer def forward( @@ -2058,12 +2036,10 @@ def _conv_out_length(input_length, kernel_size, stride): @add_start_docstrings_to_model_forward(WAV2VEC2_CONFORMER_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, - checkpoint=_XVECTOR_CHECKPOINT, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=XVectorOutput, config_class=_CONFIG_FOR_DOC, modality="audio", - expected_output=_XVECTOR_EXPECTED_OUTPUT, ) # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForXVector.forward with Wav2Vec2->Wav2Vec2Conformer,wav2vec2->wav2vec2_conformer,WAV_2_VEC_2->WAV2VEC2_CONFORMER def forward( diff --git a/src/transformers/models/wavlm/modeling_wavlm.py b/src/transformers/models/wavlm/modeling_wavlm.py index 2d77c6a33e37..30cec09efac5 100755 --- a/src/transformers/models/wavlm/modeling_wavlm.py +++ b/src/transformers/models/wavlm/modeling_wavlm.py @@ -48,7 +48,6 @@ # General docstring _CONFIG_FOR_DOC = "WavLMConfig" -_PROCESSOR_FOR_DOC = "Wav2Vec2Processor" # Base docstring _CHECKPOINT_FOR_DOC = "patrickvonplaten/wavlm-libri-clean-100h-base-plus" @@ -58,12 +57,6 @@ _CTC_EXPECTED_OUTPUT = "'mister quilter is the aposle of the middle classes and we are glad to welcome his gospel'" _CTC_EXPECTED_LOSS = 12.51 -# Audio class docstring -_FEAT_EXTRACTOR_FOR_DOC = "Wav2Vec2FeatureExtractor" -_SEQ_CLASS_CHECKPOINT = "hf-internal-testing/tiny-random-wavlm" -_SEQ_CLASS_EXPECTED_OUTPUT = "'no'" # TODO(anton) - could you quickly fine-tune a KS WavLM Model -_SEQ_CLASS_EXPECTED_LOSS = 0.7 # TODO(anton) - could you quickly fine-tune a KS WavLM Model - # Frame class docstring _FRAME_CLASS_CHECKPOINT = "microsoft/wavlm-base-plus-sd" _FRAME_EXPECTED_OUTPUT = [0, 0] @@ -1212,7 +1205,6 @@ def _mask_hidden_states( @add_start_docstrings_to_model_forward(WAVLM_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=Wav2Vec2BaseModelOutput, config_class=_CONFIG_FOR_DOC, @@ -1320,7 +1312,6 @@ def freeze_feature_encoder(self): @add_start_docstrings_to_model_forward(WAVLM_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_PROCESSOR_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC, output_type=CausalLMOutput, config_class=_CONFIG_FOR_DOC, @@ -1407,7 +1398,6 @@ def forward( """, WAVLM_START_DOCSTRING, ) -# Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification with Wav2Vec2->WavLM, wav2vec2->wavlm, WAV_2_VEC_2->WAVLM class WavLMForSequenceClassification(WavLMPreTrainedModel): def __init__(self, config): super().__init__(config) @@ -1426,6 +1416,7 @@ def __init__(self, config): # Initialize weights and apply final processing self.post_init() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_feature_extractor def freeze_feature_extractor(self): """ Calling this function will disable the gradient computation for the feature encoder so that its parameters will @@ -1438,6 +1429,7 @@ def freeze_feature_extractor(self): ) self.freeze_feature_encoder() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_feature_encoder with wav2vec2->wavlm def freeze_feature_encoder(self): """ Calling this function will disable the gradient computation for the feature encoder so that its parameter will @@ -1445,6 +1437,7 @@ def freeze_feature_encoder(self): """ self.wavlm.feature_extractor._freeze_parameters() + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.freeze_base_model with wav2vec2->wavlm def freeze_base_model(self): """ Calling this function will disable the gradient computation for the base model so that its parameters will not @@ -1455,14 +1448,12 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(WAVLM_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, - checkpoint=_SEQ_CLASS_CHECKPOINT, + checkpoint=_CHECKPOINT_FOR_DOC, output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, modality="audio", - expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, - expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) + # Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2ForSequenceClassification.forward with Wav2Vec2->WavLM, wav2vec2->wavlm def forward( self, input_values: Optional[torch.Tensor], @@ -1578,7 +1569,6 @@ def freeze_base_model(self): @add_start_docstrings_to_model_forward(WAVLM_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_FRAME_CLASS_CHECKPOINT, output_type=TokenClassifierOutput, config_class=_CONFIG_FOR_DOC, @@ -1761,7 +1751,6 @@ def _conv_out_length(input_length, kernel_size, stride): @add_start_docstrings_to_model_forward(WAVLM_INPUTS_DOCSTRING) @add_code_sample_docstrings( - processor_class=_FEAT_EXTRACTOR_FOR_DOC, checkpoint=_XVECTOR_CHECKPOINT, output_type=XVectorOutput, config_class=_CONFIG_FOR_DOC,