From e27ab7c7e015ef731f40f65620b826f3addf5c6f Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 8 Mar 2022 14:44:49 +0100
Subject: [PATCH 01/12] Move generate docs

---
 docs/source/_toctree.yml                     |  2 ++
 docs/source/main_classes/model.mdx           |  8 -------
 docs/source/main_classes/text_generation.mdx | 23 ++++++++++++++++++++
 3 files changed, 25 insertions(+), 8 deletions(-)
 create mode 100644 docs/source/main_classes/text_generation.mdx

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 0415f942cf14..aacd9a5c101d 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -114,6 +114,8 @@
       title: Logging
     - local: main_classes/model
       title: Models
+    - local: main_classes/text_generationgenerate
+      title: Text Generation
     - local: main_classes/onnx
       title: ONNX
     - local: main_classes/optimizer_schedules
diff --git a/docs/source/main_classes/model.mdx b/docs/source/main_classes/model.mdx
index d65ae8516e32..4da5e72b7ed1 100644
--- a/docs/source/main_classes/model.mdx
+++ b/docs/source/main_classes/model.mdx
@@ -86,14 +86,6 @@ Due to Pytorch design, this functionality is only available for floating dtypes.
     - push_to_hub
     - all
 
-## Generation
-
-[[autodoc]] generation_utils.GenerationMixin
-
-[[autodoc]] generation_tf_utils.TFGenerationMixin
-
-[[autodoc]] generation_flax_utils.FlaxGenerationMixin
-
 ## Pushing to the Hub
 
 [[autodoc]] file_utils.PushToHubMixin
diff --git a/docs/source/main_classes/text_generation.mdx b/docs/source/main_classes/text_generation.mdx
new file mode 100644
index 000000000000..a599297565e2
--- /dev/null
+++ b/docs/source/main_classes/text_generation.mdx
@@ -0,0 +1,23 @@
+<!--Copyright 2022 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
+# Generation
+
+Methods for auto-regressive text generation, namely [`~generation_utils.GenerationMixin.generate`] (for the PyTorch models), [`~generation_tf_utils.TFGenerationMixin.generate`] (for the TensorFlow models) and [`~generation_flax_utils.FlaxGenerationMixin.generate`] (for the Flax/JAX models),  are defined in [`~generation_utils.GenerationMixin`], [`~generation_tf_utils.TFGenerationMixin`] and [`~generation_flax_utils.FlaxGenerationMixin`] respectively.
+The *Mixin* classes are exposed in the base model classes [`PreTrainedModel`], [`TFPreTrainedModel`], and [`FlaxPreTrainedModel`] respectively, so that every model class
+has access to `generate` functions.
+
+[[autodoc]] generation_utils.GenerationMixin
+
+[[autodoc]] generation_tf_utils.TFGenerationMixin
+
+[[autodoc]] generation_flax_utils.FlaxGenerationMixin

From c956a0a609a9447a2e124e8bff4526e86fb1b2c9 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 8 Mar 2022 14:45:41 +0100
Subject: [PATCH 02/12] up

---
 docs/source/main_classes/text_generation.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/main_classes/text_generation.mdx b/docs/source/main_classes/text_generation.mdx
index a599297565e2..ea39579fe7b9 100644
--- a/docs/source/main_classes/text_generation.mdx
+++ b/docs/source/main_classes/text_generation.mdx
@@ -14,7 +14,7 @@ specific language governing permissions and limitations under the License.
 
 Methods for auto-regressive text generation, namely [`~generation_utils.GenerationMixin.generate`] (for the PyTorch models), [`~generation_tf_utils.TFGenerationMixin.generate`] (for the TensorFlow models) and [`~generation_flax_utils.FlaxGenerationMixin.generate`] (for the Flax/JAX models),  are defined in [`~generation_utils.GenerationMixin`], [`~generation_tf_utils.TFGenerationMixin`] and [`~generation_flax_utils.FlaxGenerationMixin`] respectively.
 The *Mixin* classes are exposed in the base model classes [`PreTrainedModel`], [`TFPreTrainedModel`], and [`FlaxPreTrainedModel`] respectively, so that every model class
-has access to `generate` functions.
+has access to the `generate` functions.
 
 [[autodoc]] generation_utils.GenerationMixin
 

From 96dfd64f8559264441a379db20fcc1b98dce6437 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Tue, 8 Mar 2022 14:46:08 +0100
Subject: [PATCH 03/12] Update docs/source/_toctree.yml

---
 docs/source/_toctree.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index aacd9a5c101d..23e9a6fdcb99 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -114,7 +114,7 @@
       title: Logging
     - local: main_classes/model
       title: Models
-    - local: main_classes/text_generationgenerate
+    - local: main_classes/text_generation
       title: Text Generation
     - local: main_classes/onnx
       title: ONNX

From b1fad4cd781011faac62edf8e4fccede08ce8d06 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 12:26:18 +0100
Subject: [PATCH 04/12] correct

---
 docs/source/main_classes/text_generation.mdx | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/docs/source/main_classes/text_generation.mdx b/docs/source/main_classes/text_generation.mdx
index ea39579fe7b9..ae5061003f87 100644
--- a/docs/source/main_classes/text_generation.mdx
+++ b/docs/source/main_classes/text_generation.mdx
@@ -12,12 +12,22 @@ specific language governing permissions and limitations under the License.
 
 # Generation
 
-Methods for auto-regressive text generation, namely [`~generation_utils.GenerationMixin.generate`] (for the PyTorch models), [`~generation_tf_utils.TFGenerationMixin.generate`] (for the TensorFlow models) and [`~generation_flax_utils.FlaxGenerationMixin.generate`] (for the Flax/JAX models),  are defined in [`~generation_utils.GenerationMixin`], [`~generation_tf_utils.TFGenerationMixin`] and [`~generation_flax_utils.FlaxGenerationMixin`] respectively.
-The *Mixin* classes are exposed in the base model classes [`PreTrainedModel`], [`TFPreTrainedModel`], and [`FlaxPreTrainedModel`] respectively, so that every model class
-has access to the `generate` functions.
+The methods for auto-regressive text generation, namely [`~generation_utils.GenerationMixin.generate`] (for the PyTorch models), [`~generation_tf_utils.TFGenerationMixin.generate`] (for the TensorFlow models) and [`~generation_flax_utils.FlaxGenerationMixin.generate`] (for the Flax/JAX models), are implemented in [`~generation_utils.GenerationMixin`], [`~generation_tf_utils.TFGenerationMixin`] and [`~generation_flax_utils.FlaxGenerationMixin`] respectively.
+
+The `GenerationMixin` classes are inherited by the corresponding base model classes, *e.g.* [`PreTrainedModel`], [`TFPreTrainedModel`], and [`FlaxPreTrainedModel`] respectively, therefore exposing all 
+methods for auto-regressive text generation to every model class.
+
+## GenerationMixn
 
 [[autodoc]] generation_utils.GenerationMixin
+	- generate
+
+## TFGenerationMixn
 
 [[autodoc]] generation_tf_utils.TFGenerationMixin
+	- generate
+
+## FlaxGenerationMixn
 
 [[autodoc]] generation_flax_utils.FlaxGenerationMixin
+	- generate

From c96e15c617482cba7f5540cf77df515c8225245a Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 15:20:24 +0100
Subject: [PATCH 05/12] correct some stuff

---
 src/transformers/generation_utils.py | 125 ++++++++++++++-------------
 1 file changed, 63 insertions(+), 62 deletions(-)

diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index d9a901d201d9..0a3093f41bac 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -377,7 +377,14 @@ class BeamSampleEncoderDecoderOutput(ModelOutput):
 
 class GenerationMixin:
     """
-    A class containing all of the functions supporting generation, to be used as a mixin in [`PreTrainedModel`].
+    A class containing all functions for auto-regressive text generation, to be used as a mixin in [`PreTrainedModel`].
+
+    The class exposes the [`~generation_utils.GenerationMixin.generate`], which can be used for:
+        - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and `do_sample=False`.
+        - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and `do_sample=True`.
+        - *beam-search decoding* by calling [`~generation_utils.GenerationMixin.beam_search`] if `num_beams>1` and `do_sample=False`.
+        - *beam-search multinomial sampling* by calling [`~generation_utils.GenerationMixin.beam_sample`] if `num_beams>1` and `do_sample=True`.
+        - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if `num_beams>1` and `num_beam_groups>1`.
     """
 
     def _prepare_model_inputs(
@@ -847,18 +854,23 @@ def generate(
         **model_kwargs,
     ) -> Union[GreedySearchOutput, SampleOutput, BeamSearchOutput, BeamSampleOutput, torch.LongTensor]:
         r"""
-        Generates sequences for models with a language modeling head. The method currently supports greedy decoding,
-        multinomial sampling, beam-search decoding, and beam-search multinomial sampling.
 
-        Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name inside
-        the [`PretrainedConfig`] of the model. The default values indicated are the default values of those config.
+        Generates sequences of token ids for models with a language modeling head. The method supports the following
+        generation methods for text-decoder, text-to-text, speech-to-text, and vision-to-text models:
 
-        Most of these parameters are explained in more detail in [this blog
-        post](https://huggingface.co/blog/how-to-generate).
+            - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and `do_sample=False`.
+            - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and `do_sample=True`.
+            - *beam-search decoding* by calling [`~generation_utils.GenerationMixin.beam_search`] if `num_beams>1` and `do_sample=False`.
+            - *beam-search multinomial sampling* by calling [`~generation_utils.GenerationMixin.beam_sample`] if `num_beams>1` and `do_sample=True`.
+            - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if `num_beams>1` and `num_beam_groups>1`.
+
+        Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name
+        as defined in the model's config (`config.json`) which in turn defaults to the [`~modeling_utils.PretrainedConfig`] of the model.
+
+        Most of these parameters are explained in more detail in [this blog post](https://huggingface.co/blog/how-to-generate).
 
         Parameters:
-            inputs (`torch.Tensor` of shape `(batch_size, sequence_length)`, `(batch_size, sequence_length,
-            feature_dim)` or `(batch_size, num_channels, height, width)`, *optional*):
+            inputs (`torch.Tensor` of varying shape depending on the modality, *optional*):
                 The sequence used as a prompt for the generation or as model inputs to the encoder. If `None` the
                 method initializes it with `bos_token_id` and a batch size of 1. For decoder-only models `inputs`
                 should of in the format of `input_ids`. For encoder-decoder models *inputs* can represent any of
@@ -997,66 +1009,55 @@ def generate(
 
         Examples:
 
+        Greedy Decoding:
+
         ```python
-        >>> from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
+        >>> from transformers import AutoTokenizer, AutoModelForCausalLM,
 
-        >>> tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
-        >>> model = AutoModelForCausalLM.from_pretrained("distilgpt2")
-        >>> # do greedy decoding without providing a prompt
-        >>> outputs = model.generate(max_length=40)
-        >>> print("Generated:", tokenizer.decode(outputs[0], skip_special_tokens=True))
+        >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
 
-        >>> tokenizer = AutoTokenizer.from_pretrained("t5-base")
-        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
-        >>> document = (
-        ...     "at least two people were killed in a suspected bomb attack on a passenger bus "
-        ...     "in the strife-torn southern philippines on monday , the military said."
-        ... )
-        >>> # encode input context
-        >>> input_ids = tokenizer(document, return_tensors="pt").input_ids
-        >>> # generate 3 independent sequences using beam search decoding (5 beams)
-        >>> # with T5 encoder-decoder model conditioned on short news article.
-        >>> outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3)
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> prompt = "Today I believe we can finally"
+        >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
 
-        >>> tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
-        >>> model = AutoModelForCausalLM.from_pretrained("distilgpt2")
-        >>> input_context = "The dog"
-        >>> # encode input context
-        >>> input_ids = tokenizer(input_context, return_tensors="pt").input_ids
-        >>> # generate 3 candidates using sampling
-        >>> outputs = model.generate(input_ids=input_ids, max_length=20, num_return_sequences=3, do_sample=True)
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> # generate up to 30 tokens
+        >>> outputs = model.generate(input_ids, do_sample=False, max_length=30)
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True))
 
-        >>> tokenizer = AutoTokenizer.from_pretrained("ctrl")
-        >>> model = AutoModelForCausalLM.from_pretrained("ctrl")
-        >>> # "Legal" is one of the control codes for ctrl
-        >>> input_context = "Legal My neighbor is"
-        >>> # encode input context
-        >>> input_ids = tokenizer(input_context, return_tensors="pt").input_ids
-        >>> outputs = model.generate(input_ids=input_ids, max_length=20, repetition_penalty=1.2)
-        >>> print("Generated:", tokenizer.decode(outputs[0], skip_special_tokens=True))
+        ```
+
+        Multinomial Sampling:
+
+        ```python
+        >>> from transformers import AutoTokenizer, AutoModelForCausalLM,
+        >>> import torch
 
-        >>> tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=False)
+        >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
         >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
-        >>> input_context = "My cute dog"
-        >>> # get tokens of words that should not be generated
-        >>> bad_words_ids = tokenizer(
-        ...     ["idiot", "stupid", "shut up"], add_prefix_space=True, add_special_tokens=False
-        >>> ).input_ids
-        >>> # get tokens of words that we want generated
-        >>> force_words_ids = tokenizer(["runs", "loves"], add_prefix_space=True, add_special_tokens=False).input_ids
-        >>> # encode input context
-        >>> input_ids = tokenizer(input_context, return_tensors="pt").input_ids
-        >>> # generate sequences without allowing bad_words to be generated
-        >>> outputs = model.generate(
-        ...     input_ids=input_ids,
-        ...     max_length=20,
-        ...     do_sample=True,
-        ...     bad_words_ids=bad_words_ids,
-        ...     force_words_ids=force_words_ids,
-        ... )
-        >>> print("Generated:", tokenizer.decode(outputs[0], skip_special_tokens=True))
+
+        >>> prompt = "Today I believe we can finally"
+        >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+
+        >>> torch.manual_seed(0)
+        >>> outputs = model.generate(input_ids)
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True))
+
+        ```
+
+        Beam-search decoding:
+
+        ```python
+        >>> from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+
+        >>> tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
+        >>> model = AutoModelForCausalLM.from_pretrained("Helsinki-NLP/opus-mt-en-de")
+
+        >>> sentence = "Paris is one of the densest populated areas in Europe."
+        >>> input_ids = tokenizer(sentence, return_tensors="pt").input_ids
+
+        >>> outputs = model.generate(input_ids)
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True))
+
         ```"""
         # 1. Set generation parameters if not already defined
         bos_token_id = bos_token_id if bos_token_id is not None else self.config.bos_token_id

From 675ad48461a2cf0102acc97e38084d5edf234058 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 15:20:41 +0100
Subject: [PATCH 06/12] correct tests

---
 docs/source/main_classes/text_generation.mdx | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/source/main_classes/text_generation.mdx b/docs/source/main_classes/text_generation.mdx
index ae5061003f87..509dfe750ad8 100644
--- a/docs/source/main_classes/text_generation.mdx
+++ b/docs/source/main_classes/text_generation.mdx
@@ -21,6 +21,12 @@ methods for auto-regressive text generation to every model class.
 
 [[autodoc]] generation_utils.GenerationMixin
 	- generate
+	- greedy_search
+	- sample
+	- beam_search
+	- beam_sample
+	- group_beam_search
+	- constrained_beam_search
 
 ## TFGenerationMixn
 

From a468c1be67d61049b0a2d61c09e70e0e5e04acad Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 15:38:10 +0100
Subject: [PATCH 07/12] more fixes

---
 src/transformers/generation_utils.py | 37 ++++++++++++++--------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 0a3093f41bac..39558995d490 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -1012,7 +1012,7 @@ def generate(
         Greedy Decoding:
 
         ```python
-        >>> from transformers import AutoTokenizer, AutoModelForCausalLM,
+        >>> from transformers import AutoTokenizer, AutoModelForCausalLM
 
         >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
         >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
@@ -1022,14 +1022,14 @@ def generate(
 
         >>> # generate up to 30 tokens
         >>> outputs = model.generate(input_ids, do_sample=False, max_length=30)
-        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
         ```
 
         Multinomial Sampling:
 
         ```python
-        >>> from transformers import AutoTokenizer, AutoModelForCausalLM,
+        >>> from transformers import AutoTokenizer, AutoModelForCausalLM
         >>> import torch
 
         >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
@@ -1038,9 +1038,9 @@ def generate(
         >>> prompt = "Today I believe we can finally"
         >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
 
-        >>> torch.manual_seed(0)
+        >>> torch.manual_seed(0)  # doctest: +IGNORE_RESULT
         >>> outputs = model.generate(input_ids)
-        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
         ```
 
@@ -1050,13 +1050,13 @@ def generate(
         >>> from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
         >>> tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
-        >>> model = AutoModelForCausalLM.from_pretrained("Helsinki-NLP/opus-mt-en-de")
+        >>> model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-de")
 
         >>> sentence = "Paris is one of the densest populated areas in Europe."
         >>> input_ids = tokenizer(sentence, return_tensors="pt").input_ids
 
         >>> outputs = model.generate(input_ids)
-        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
         ```"""
         # 1. Set generation parameters if not already defined
@@ -1458,7 +1458,7 @@ def greedy_search(
         **model_kwargs,
     ) -> Union[GreedySearchOutput, torch.LongTensor]:
         r"""
-        Generates sequences for models with a language modeling head using greedy decoding.
+        Generates sequences of token ids for models with a language modeling head using **greedy decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -1529,7 +1529,7 @@ def greedy_search(
 
         >>> outputs = model.greedy_search(input_ids, logits_processor=logits_processor)
 
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
         ```"""
         # init values
         logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
@@ -1684,7 +1684,7 @@ def sample(
         **model_kwargs,
     ) -> Union[SampleOutput, torch.LongTensor]:
         r"""
-        Generates sequences for models with a language modeling head using multinomial sampling.
+        Generates sequences of token ids for models with a language modeling head using **multinomial sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -1767,7 +1767,7 @@ def sample(
 
         >>> outputs = model.sample(input_ids, logits_processor=logits_processor, logits_warper=logits_warper)
 
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
         ```"""
 
         # init values
@@ -1927,7 +1927,7 @@ def beam_search(
         **model_kwargs,
     ) -> Union[BeamSearchOutput, torch.LongTensor]:
         r"""
-        Generates sequences for models with a language modeling head using beam search decoding.
+        Generates sequences of token ids for models with a language modeling head using **beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -2238,7 +2238,7 @@ def beam_sample(
         **model_kwargs,
     ) -> Union[BeamSampleOutput, torch.LongTensor]:
         r"""
-        Generates sequences for models with a language modeling head using beam search with multinomial sampling.
+        Generates sequences of token ids for models with a language modeling head using **beam search multinomial sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -2344,7 +2344,8 @@ def beam_sample(
         ...     input_ids, beam_scorer, logits_processor=logits_processor, logits_warper=logits_warper, **model_kwargs
         ... )
 
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        ['Wie alt bist du?']
         ```"""
         # init values
         logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
@@ -2557,7 +2558,7 @@ def group_beam_search(
         **model_kwargs,
     ):
         r"""
-        Generates sequences for models with a language modeling head using beam search decoding.
+        Generates sequences of token ids for models with a language modeling head using **diverse beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -2921,7 +2922,7 @@ def constrained_beam_search(
     ) -> Union[BeamSearchOutput, torch.LongTensor]:
 
         r"""
-        Generates sequences for models with a language modeling head using beam search decoding.
+        Generates sequences of token ids for models with a language modeling head using **constrained beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
             input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
@@ -3025,8 +3026,8 @@ def constrained_beam_search(
         ...     input_ids, beam_scorer, constraints=constraints, logits_processor=logits_processor, **model_kwargs
         ... )
 
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
-        # => ['Wie alter sind Sie?']
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        ['Wie alt sind Sie?']
         ```"""
         # init values
         logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()

From 966df0793ab5b13fec44e748ba04ff02138e22f8 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 16:00:24 +0100
Subject: [PATCH 08/12] finish generate

---
 src/transformers/generation_flax_utils.py | 19 +++--
 src/transformers/generation_utils.py      | 99 ++++++++++++++++-------
 2 files changed, 82 insertions(+), 36 deletions(-)

diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py
index a9f76d738e96..a83c1fd2d5a3 100644
--- a/src/transformers/generation_flax_utils.py
+++ b/src/transformers/generation_flax_utils.py
@@ -176,12 +176,19 @@ def generate(
         **model_kwargs,
     ):
         r"""
-        Generates sequences for models with a language modeling head. The method currently supports greedy decoding,
-        and, multinomial sampling.
+        Generates sequences of token ids for models with a language modeling head. The method supports the following
+        generation methods for text-decoder, text-to-text, speech-to-text, and vision-to-text models:
 
-        Apart from `input_ids`, all the arguments below will default to the value of the attribute of the same name
-        inside the [`PretrainedConfig`] of the model. The default values indicated are the default values of those
-        config.
+            - *greedy decoding* by calling [`~generation_flax_utils.FlaxGenerationMixin._greedy_search`] if
+              `num_beams=1` and `do_sample=False`.
+            - *multinomial sampling* by calling [`~generation_flax_utils.FlaxGenerationMixin._sample`] if `num_beams=1`
+              and `do_sample=True`.
+            - *beam-search decoding* by calling [`~generation_utils.FlaxGenerationMixin._beam_search`] if `num_beams>1`
+              and `do_sample=False`.
+
+        Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name as
+        defined in the model's config (`config.json`) which in turn defaults to the
+        [`~modeling_utils.PretrainedConfig`] of the model.
 
         Most of these parameters are explained in more detail in [this blog
         post](https://huggingface.co/blog/how-to-generate).
@@ -236,7 +243,7 @@ def generate(
         >>> input_ids = tokenizer(input_context, return_tensors="np").input_ids
         >>> # generate candidates using sampling
         >>> outputs = model.generate(input_ids=input_ids, max_length=20, top_k=30, do_sample=True)
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
         ```"""
         # set init values
         max_length = max_length if max_length is not None else self.config.max_length
diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 39558995d490..143abeb8ca2d 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -380,11 +380,16 @@ class GenerationMixin:
     A class containing all functions for auto-regressive text generation, to be used as a mixin in [`PreTrainedModel`].
 
     The class exposes the [`~generation_utils.GenerationMixin.generate`], which can be used for:
-        - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and `do_sample=False`.
-        - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and `do_sample=True`.
-        - *beam-search decoding* by calling [`~generation_utils.GenerationMixin.beam_search`] if `num_beams>1` and `do_sample=False`.
-        - *beam-search multinomial sampling* by calling [`~generation_utils.GenerationMixin.beam_sample`] if `num_beams>1` and `do_sample=True`.
-        - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if `num_beams>1` and `num_beam_groups>1`.
+        - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and
+          `do_sample=False`.
+        - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and
+          `do_sample=True`.
+        - *beam-search decoding* by calling [`~generation_utils.GenerationMixin.beam_search`] if `num_beams>1` and
+          `do_sample=False`.
+        - *beam-search multinomial sampling* by calling [`~generation_utils.GenerationMixin.beam_sample`] if
+          `num_beams>1` and `do_sample=True`.
+        - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if
+          `num_beams>1` and `num_beam_groups>1`.
     """
 
     def _prepare_model_inputs(
@@ -858,16 +863,23 @@ def generate(
         Generates sequences of token ids for models with a language modeling head. The method supports the following
         generation methods for text-decoder, text-to-text, speech-to-text, and vision-to-text models:
 
-            - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and `do_sample=False`.
-            - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and `do_sample=True`.
-            - *beam-search decoding* by calling [`~generation_utils.GenerationMixin.beam_search`] if `num_beams>1` and `do_sample=False`.
-            - *beam-search multinomial sampling* by calling [`~generation_utils.GenerationMixin.beam_sample`] if `num_beams>1` and `do_sample=True`.
-            - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if `num_beams>1` and `num_beam_groups>1`.
+            - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and
+              `do_sample=False`.
+            - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and
+              `do_sample=True`.
+            - *beam-search decoding* by calling [`~generation_utils.GenerationMixin.beam_search`] if `num_beams>1` and
+              `do_sample=False`.
+            - *beam-search multinomial sampling* by calling [`~generation_utils.GenerationMixin.beam_sample`] if
+              `num_beams>1` and `do_sample=True`.
+            - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if
+              `num_beams>1` and `num_beam_groups>1`.
 
-        Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name
-        as defined in the model's config (`config.json`) which in turn defaults to the [`~modeling_utils.PretrainedConfig`] of the model.
+        Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name as
+        defined in the model's config (`config.json`) which in turn defaults to the
+        [`~modeling_utils.PretrainedConfig`] of the model.
 
-        Most of these parameters are explained in more detail in [this blog post](https://huggingface.co/blog/how-to-generate).
+        Most of these parameters are explained in more detail in [this blog
+        post](https://huggingface.co/blog/how-to-generate).
 
         Parameters:
             inputs (`torch.Tensor` of varying shape depending on the modality, *optional*):
@@ -1023,7 +1035,7 @@ def generate(
         >>> # generate up to 30 tokens
         >>> outputs = model.generate(input_ids, do_sample=False, max_length=30)
         >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-
+        ['Today I believe we can finally get to the point where we can make a difference in the lives of the people of the United States of America.\n']
         ```
 
         Multinomial Sampling:
@@ -1038,10 +1050,11 @@ def generate(
         >>> prompt = "Today I believe we can finally"
         >>> input_ids = tokenizer(prompt, return_tensors="pt").input_ids
 
+        >>> # sample up to 30 tokens
         >>> torch.manual_seed(0)  # doctest: +IGNORE_RESULT
-        >>> outputs = model.generate(input_ids)
+        >>> outputs = model.generate(input_ids, do_sample=True, max_length=30)
         >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-
+        ['Today I believe we can finally get rid of discrimination," said Rep. Mark Pocan (D-Wis.).\n\n"Just look at the']
         ```
 
         Beam-search decoding:
@@ -1057,7 +1070,7 @@ def generate(
 
         >>> outputs = model.generate(input_ids)
         >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
-
+        ['Paris ist eines der dichtesten besiedelten Gebiete Europas.']
         ```"""
         # 1. Set generation parameters if not already defined
         bos_token_id = bos_token_id if bos_token_id is not None else self.config.bos_token_id
@@ -1458,7 +1471,8 @@ def greedy_search(
         **model_kwargs,
     ) -> Union[GreedySearchOutput, torch.LongTensor]:
         r"""
-        Generates sequences of token ids for models with a language modeling head using **greedy decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
+        Generates sequences of token ids for models with a language modeling head using **greedy decoding** and can be
+        used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -1509,6 +1523,8 @@ def greedy_search(
         ...     AutoModelForCausalLM,
         ...     LogitsProcessorList,
         ...     MinLengthLogitsProcessor,
+        ...     StoppingCriteriaList,
+        ...     MaxLengthCriteria,
         ... )
 
         >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
@@ -1517,26 +1533,30 @@ def greedy_search(
         >>> # set pad_token_id to eos_token_id because GPT2 does not have a EOS token
         >>> model.config.pad_token_id = model.config.eos_token_id
 
-        >>> input_prompt = "Today is a beautiful day, and"
+        >>> input_prompt = "It might be possible to"
         >>> input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids
 
         >>> # instantiate logits processors
         >>> logits_processor = LogitsProcessorList(
         ...     [
-        ...         MinLengthLogitsProcessor(15, eos_token_id=model.config.eos_token_id),
+        ...         MinLengthLogitsProcessor(10, eos_token_id=model.config.eos_token_id),
         ...     ]
         ... )
+        >>> stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=20)])
 
-        >>> outputs = model.greedy_search(input_ids, logits_processor=logits_processor)
+        >>> outputs = model.greedy_search(
+        ...     input_ids, logits_processor=logits_processor, stopping_criteria=stopping_criteria
+        ... )
 
         >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        ["It might be possible to get a better understanding of the nature of the problem, but it's not"]
         ```"""
         # init values
         logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
         stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
         if max_length is not None:
             warnings.warn(
-                "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.",
+                "`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.",
                 UserWarning,
             )
             stopping_criteria = validate_stopping_criteria(stopping_criteria, max_length)
@@ -1684,7 +1704,8 @@ def sample(
         **model_kwargs,
     ) -> Union[SampleOutput, torch.LongTensor]:
         r"""
-        Generates sequences of token ids for models with a language modeling head using **multinomial sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
+        Generates sequences of token ids for models with a language modeling head using **multinomial sampling** and
+        can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -1740,7 +1761,10 @@ def sample(
         ...     MinLengthLogitsProcessor,
         ...     TopKLogitsWarper,
         ...     TemperatureLogitsWarper,
+        ...     StoppingCriteriaList,
+        ...     MaxLengthCriteria,
         ... )
+        >>> import torch
 
         >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
         >>> model = AutoModelForCausalLM.from_pretrained("gpt2")
@@ -1765,9 +1789,18 @@ def sample(
         ...     ]
         ... )
 
-        >>> outputs = model.sample(input_ids, logits_processor=logits_processor, logits_warper=logits_warper)
+        >>> stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=20)])
+
+        >>> torch.manual_seed(0)  # doctest: +IGNORE_RESULT
+        >>> outputs = model.sample(
+        ...     input_ids,
+        ...     logits_processor=logits_processor,
+        ...     logits_warper=logits_warper,
+        ...     stopping_criteria=stopping_criteria,
+        ... )
 
         >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        ['Today is a beautiful day, and a wonderful day.\n\nI was lucky enough to meet the']
         ```"""
 
         # init values
@@ -1927,7 +1960,8 @@ def beam_search(
         **model_kwargs,
     ) -> Union[BeamSearchOutput, torch.LongTensor]:
         r"""
-        Generates sequences of token ids for models with a language modeling head using **beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
+        Generates sequences of token ids for models with a language modeling head using **beam search decoding** and
+        can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -2021,7 +2055,8 @@ def beam_search(
 
         >>> outputs = model.beam_search(input_ids, beam_scorer, logits_processor=logits_processor, **model_kwargs)
 
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        ['Wie alt bist du?']
         ```"""
         # init values
         logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
@@ -2238,7 +2273,8 @@ def beam_sample(
         **model_kwargs,
     ) -> Union[BeamSampleOutput, torch.LongTensor]:
         r"""
-        Generates sequences of token ids for models with a language modeling head using **beam search multinomial sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
+        Generates sequences of token ids for models with a language modeling head using **beam search multinomial
+        sampling** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -2558,7 +2594,8 @@ def group_beam_search(
         **model_kwargs,
     ):
         r"""
-        Generates sequences of token ids for models with a language modeling head using **diverse beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
+        Generates sequences of token ids for models with a language modeling head using **diverse beam search
+        decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
 
@@ -2658,7 +2695,8 @@ def group_beam_search(
         ...     input_ids, beam_scorer, logits_processor=logits_processor, **model_kwargs
         ... )
 
-        >>> print("Generated:", tokenizer.batch_decode(outputs, skip_special_tokens=True))
+        >>> tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        ['Wie alt bist du?']
         ```"""
         # init values
         logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
@@ -2922,7 +2960,8 @@ def constrained_beam_search(
     ) -> Union[BeamSearchOutput, torch.LongTensor]:
 
         r"""
-        Generates sequences of token ids for models with a language modeling head using **constrained beam search decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
+        Generates sequences of token ids for models with a language modeling head using **constrained beam search
+        decoding** and can be used for text-decoder, text-to-text, speech-to-text, and vision-to-text models.
 
         Parameters:
             input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):

From edbae8eb045ea7879fc0742cbe601f8cdee0d171 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 16:00:43 +0100
Subject: [PATCH 09/12] add to doc stest

---
 utils/documentation_tests.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt
index 43135e225c8a..d70f7ab6729e 100644
--- a/utils/documentation_tests.txt
+++ b/utils/documentation_tests.txt
@@ -20,5 +20,6 @@ src/transformers/models/poolformer/modeling_poolformer.py
 src/transformers/models/vit_mae/modeling_vit_mae.py
 src/transformers/models/segformer/modeling_segformer.py
 src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py
+src/transformers/generation_utils.py
 docs/source/quicktour.mdx
-docs/source/task_summary.mdx
\ No newline at end of file
+docs/source/task_summary.mdx

From 06a72d93f7f4b30b38c4f67fa06511bccd5bf3f3 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 16:06:32 +0100
Subject: [PATCH 10/12] finish

---
 src/transformers/generation_flax_utils.py | 11 ++++++++++-
 src/transformers/generation_utils.py      |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py
index a83c1fd2d5a3..8967b876faaf 100644
--- a/src/transformers/generation_flax_utils.py
+++ b/src/transformers/generation_flax_utils.py
@@ -118,7 +118,16 @@ class BeamSearchState:
 
 class FlaxGenerationMixin:
     """
-    A class containing all of the functions supporting generation, to be used as a mixin in [`FlaxPreTrainedModel`].
+    A class containing all functions for auto-regressive text generation, to be used as a mixin in
+    [`FlaxPreTrainedModel`].
+
+    The class exposes [`~generation_flax_utils.FlaxGenerationMixin.generate`], which can be used for:
+            - *greedy decoding* by calling [`~generation_flax_utils.FlaxGenerationMixin._greedy_search`] if
+              `num_beams=1` and `do_sample=False`.
+            - *multinomial sampling* by calling [`~generation_flax_utils.FlaxGenerationMixin._sample`] if `num_beams=1`
+              and `do_sample=True`.
+            - *beam-search decoding* by calling [`~generation_utils.FlaxGenerationMixin._beam_search`] if `num_beams>1`
+              and `do_sample=False`.
     """
 
     @staticmethod
diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 143abeb8ca2d..80ca838ce243 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -379,7 +379,7 @@ class GenerationMixin:
     """
     A class containing all functions for auto-regressive text generation, to be used as a mixin in [`PreTrainedModel`].
 
-    The class exposes the [`~generation_utils.GenerationMixin.generate`], which can be used for:
+    The class exposes [`~generation_utils.GenerationMixin.generate`], which can be used for:
         - *greedy decoding* by calling [`~generation_utils.GenerationMixin.greedy_search`] if `num_beams=1` and
           `do_sample=False`.
         - *multinomial sampling* by calling [`~generation_utils.GenerationMixin.sample`] if `num_beams=1` and

From d77eaadb4ed6d7bd19bee96a73ac7b7b1a289ad5 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Wed, 9 Mar 2022 16:32:04 +0100
Subject: [PATCH 11/12] finalize

---
 src/transformers/generation_utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 80ca838ce243..45006e76ce55 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -390,6 +390,8 @@ class GenerationMixin:
           `num_beams>1` and `do_sample=True`.
         - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if
           `num_beams>1` and `num_beam_groups>1`.
+        - *constrained beam-search decoding* by calling [`~generation_utils.GenerationMixin.constrained_beam_search`],
+          if `constraints!=None` or `force_words_ids!=None`.
     """
 
     def _prepare_model_inputs(
@@ -873,6 +875,9 @@ def generate(
               `num_beams>1` and `do_sample=True`.
             - *diverse beam-search decoding* by calling [`~generation_utils.GenerationMixin.group_beam_search`], if
               `num_beams>1` and `num_beam_groups>1`.
+            - *constrained beam-search decoding* by calling
+              [`~generation_utils.GenerationMixin.constrained_beam_search`], if `constraints!=None` or
+              `force_words_ids!=None`.
 
         Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name as
         defined in the model's config (`config.json`) which in turn defaults to the

From 91a2b46084ac2a3bd0a556a5f98523b922b0e0b6 Mon Sep 17 00:00:00 2001
From: Patrick von Platen <patrick.v.platen@gmail.com>
Date: Thu, 10 Mar 2022 11:19:35 +0100
Subject: [PATCH 12/12] add warning to generate method

---
 src/transformers/generation_flax_utils.py | 4 ++++
 src/transformers/generation_utils.py      | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py
index 8967b876faaf..2bc6db2f56dd 100644
--- a/src/transformers/generation_flax_utils.py
+++ b/src/transformers/generation_flax_utils.py
@@ -195,10 +195,14 @@ def generate(
             - *beam-search decoding* by calling [`~generation_utils.FlaxGenerationMixin._beam_search`] if `num_beams>1`
               and `do_sample=False`.
 
+        <Tip warning={true}>
+
         Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name as
         defined in the model's config (`config.json`) which in turn defaults to the
         [`~modeling_utils.PretrainedConfig`] of the model.
 
+        </Tip>
+
         Most of these parameters are explained in more detail in [this blog
         post](https://huggingface.co/blog/how-to-generate).
 
diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 45006e76ce55..85bbc51e6f23 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -879,10 +879,14 @@ def generate(
               [`~generation_utils.GenerationMixin.constrained_beam_search`], if `constraints!=None` or
               `force_words_ids!=None`.
 
+        <Tip warning={true}>
+
         Apart from `inputs`, all the arguments below will default to the value of the attribute of the same name as
         defined in the model's config (`config.json`) which in turn defaults to the
         [`~modeling_utils.PretrainedConfig`] of the model.
 
+        </Tip>
+
         Most of these parameters are explained in more detail in [this blog
         post](https://huggingface.co/blog/how-to-generate).