diff --git a/README.md b/README.md
index e9b1a96f4a..28b3cb6e87 100644
--- a/README.md
+++ b/README.md
@@ -15,11 +15,11 @@ limitations under the License.
 -->
 
 <a href="https://github.com/huggingface/optimum-habana#gh-light-mode-only">
-  <img src="https://github.com/huggingface/optimum-habana/blob/main/readme_logo_light.png"/>
+  <img src="https://github.com/huggingface/optimum-habana/blob/v1.20-release/readme_logo_light.png"/>
 </a>
 
 <a href="https://github.com/huggingface/optimum-habana#gh-dark-mode-only">
-  <img src="https://github.com/huggingface/optimum-habana/blob/main/readme_logo_dark.png"/>
+  <img src="https://github.com/huggingface/optimum-habana/blob/v1.20-release/readme_logo_dark.png"/>
 </a>
 
 
@@ -123,8 +123,8 @@ The [GaudiTrainer](https://huggingface.co/docs/optimum/habana/package_reference/
 [Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer), and adapting a script using the Trainer to
 make it work with Intel Gaudi accelerators will mostly consist in simply swapping the `Trainer` class for the `GaudiTrainer` one.
 
-That's how most of the [example scripts](https://github.com/huggingface/optimum-habana/tree/main/examples) were adapted from their
-[original counterparts](https://github.com/huggingface/transformers/tree/main/examples/pytorch).
+That's how most of the [example scripts](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples) were adapted from their
+[original counterparts](https://github.com/huggingface/transformers/tree/v1.20-release/examples/pytorch).
 
 Here is an example:
 ```diff
@@ -230,62 +230,62 @@ The following model architectures, tasks and device distributions have been vali
 
 | Architecture | Training | Inference | Tasks |
 |:-------------|:--------:|:---------:|:------|
-| BERT         | :heavy_check_mark: | :heavy_check_mark: | <li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text feature extraction](https://github.com/huggingface/optimum-habana/tree/main/examples/text-feature-extraction)</li> |
-| RoBERTa | :heavy_check_mark: | :heavy_check_mark: | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li> |
-| ALBERT | :heavy_check_mark: | :heavy_check_mark: | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li> |
-| DistilBERT |:heavy_check_mark: | :heavy_check_mark: | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li> |
-| GPT2             | :heavy_check_mark: | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| BLOOM(Z) |   | <li>DeepSpeed</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| StarCoder / StarCoder2 | :heavy_check_mark:  | <li>Single-card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| GPT-J | <li>DeepSpeed</li> | <li>Single card</li><li>DeepSpeed</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| GPT-Neo |      | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| GPT-NeoX | <li>DeepSpeed</li> | <li>DeepSpeed</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| OPT |   | <li>DeepSpeed</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Llama 2 / CodeLlama / Llama 3 / Llama Guard / Granite | :heavy_check_mark: | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)</li> |
-| StableLM |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Falcon | <li>LoRA</li> | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| CodeGen |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| MPT |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Mistral |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Phi | :heavy_check_mark:  | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Mixtral |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Persimmon |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Qwen2 / Qwen3 | <li>Single card</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Qwen2-MoE |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Gemma | :heavy_check_mark:  | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Gemma2 |  | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| XGLM | | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Cohere       |          | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| T5 / Flan T5 | :heavy_check_mark: | :heavy_check_mark: | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
-| BART |   | <li>Single card</li> | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
-| ViT | :heavy_check_mark: | :heavy_check_mark: | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
-| Swin | :heavy_check_mark: | :heavy_check_mark: | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
-| Wav2Vec2 | :heavy_check_mark: | :heavy_check_mark: | <li>[audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)</li><li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
-| Whisper | :heavy_check_mark: | :heavy_check_mark: | <li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
-| SpeechT5 |   | <li>Single card</li> | <li>[text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)</li> |
-| CLIP | :heavy_check_mark: | :heavy_check_mark: | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
-| BridgeTower | :heavy_check_mark: | :heavy_check_mark: | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
-| ESMFold |   | <li>Single card</li> | <li>[protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)</li> |
-| Blip |   | <li>Single card</li> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| OWLViT |   | <li>Single card</li> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)</li> |
-| ClipSeg |   | <li>Single card</li> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
-| Llava / Llava-next / Llava-onevision |    | <li>Single card</li> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| idefics2 | <li>LoRA</li> | <li>Single card</li> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| Paligemma | | <li>Single card</li> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| Segment Anything Model |   | <li>Single card</li> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
-| VideoMAE | | <li>Single card</li> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)</li> |
-| TableTransformer |   | <li>Single card</li> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/table-detection) </li> |
-| DETR |   | <li>Single card</li> | <li>[object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/object-detection)</li> |
-| Mllama     | <li>LoRA</li> | :heavy_check_mark: | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| MiniCPM3 |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Baichuan2 | <li>DeepSpeed</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| DeepSeek-V2 | :heavy_check_mark: | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| DeepSeek-V3 / Moonlight |   | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| ChatGLM | <li>DeepSpeed</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Qwen2-VL |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| VideoLLaVA | | <div style="text-align:left"><li>Single card</li></div> | <li>[Video comprehension](https://github.com/huggingface/optimum-habana/tree/main/examples/video-comprehension)</li> |
-| GLM-4V | |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| Arctic |          |  <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| BERT         | :heavy_check_mark: | :heavy_check_mark: | <li>[text classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text feature extraction](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-feature-extraction)</li> |
+| RoBERTa | :heavy_check_mark: | :heavy_check_mark: | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li> |
+| ALBERT | :heavy_check_mark: | :heavy_check_mark: | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li> |
+| DistilBERT |:heavy_check_mark: | :heavy_check_mark: | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li> |
+| GPT2             | :heavy_check_mark: | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| BLOOM(Z) |   | <li>DeepSpeed</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| StarCoder / StarCoder2 | :heavy_check_mark:  | <li>Single-card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| GPT-J | <li>DeepSpeed</li> | <li>Single card</li><li>DeepSpeed</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| GPT-Neo |      | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| GPT-NeoX | <li>DeepSpeed</li> | <li>DeepSpeed</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| OPT |   | <li>DeepSpeed</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Llama 2 / CodeLlama / Llama 3 / Llama Guard / Granite | :heavy_check_mark: | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification) (Llama Guard)</li> |
+| StableLM |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Falcon | <li>LoRA</li> | :heavy_check_mark: | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| CodeGen |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| MPT |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Mistral |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Phi | :heavy_check_mark:  | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Mixtral |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Persimmon |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Qwen2 / Qwen3 | <li>Single card</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Qwen2-MoE |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Gemma | :heavy_check_mark:  | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Gemma2 |  | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| XGLM | | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Cohere       |          | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| T5 / Flan T5 | :heavy_check_mark: | :heavy_check_mark: | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
+| BART |   | <li>Single card</li> | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
+| ViT | :heavy_check_mark: | :heavy_check_mark: | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)</li> |
+| Swin | :heavy_check_mark: | :heavy_check_mark: | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)</li> |
+| Wav2Vec2 | :heavy_check_mark: | :heavy_check_mark: | <li>[audio classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/audio-classification)</li><li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition)</li> |
+| Whisper | :heavy_check_mark: | :heavy_check_mark: | <li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition)</li> |
+| SpeechT5 |   | <li>Single card</li> | <li>[text to speech](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-to-speech)</li> |
+| CLIP | :heavy_check_mark: | :heavy_check_mark: | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/contrastive-image-text)</li> |
+| BridgeTower | :heavy_check_mark: | :heavy_check_mark: | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/contrastive-image-text)</li> |
+| ESMFold |   | <li>Single card</li> | <li>[protein folding](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/protein-folding)</li> |
+| Blip |   | <li>Single card</li> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| OWLViT |   | <li>Single card</li> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/zero-shot-object-detection)</li> |
+| ClipSeg |   | <li>Single card</li> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-segementation)</li> |
+| Llava / Llava-next / Llava-onevision |    | <li>Single card</li> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| idefics2 | <li>LoRA</li> | <li>Single card</li> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| Paligemma | | <li>Single card</li> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| Segment Anything Model |   | <li>Single card</li> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-segementation)</li> |
+| VideoMAE | | <li>Single card</li> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/video-classification)</li> |
+| TableTransformer |   | <li>Single card</li> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/table-detection) </li> |
+| DETR |   | <li>Single card</li> | <li>[object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-detection)</li> |
+| Mllama     | <li>LoRA</li> | :heavy_check_mark: | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| MiniCPM3 |   | <li>Single card</li> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Baichuan2 | <li>DeepSpeed</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| DeepSeek-V2 | :heavy_check_mark: | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| DeepSeek-V3 / Moonlight |   | :heavy_check_mark: | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| ChatGLM | <li>DeepSpeed</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Qwen2-VL |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| VideoLLaVA | | <div style="text-align:left"><li>Single card</li></div> | <li>[Video comprehension](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/video-comprehension)</li> |
+| GLM-4V | |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| Arctic |          |  <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
 
 </div>
 
@@ -294,33 +294,33 @@ The following model architectures, tasks and device distributions have been vali
 
 | Architecture        | Training | Inference | Tasks |
 |:--------------------|:--------:|:---------:|:------|
-| Stable Diffusion    | :heavy_check_mark: | :heavy_check_mark: | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#text-to-image-generation)</li><li>[image-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#image-to-image-generation)</li> |
-| Stable Diffusion XL | :heavy_check_mark: | :heavy_check_mark: | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#stable-diffusion-xl-sdxl)</li><li>[image-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#stable-diffusion-xl-refiner)</li> |
-| Stable Diffusion Depth2img |         | <li>Single card</li> | <li>[depth-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| Stable Diffusion 3  | :heavy_check_mark: | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#stable-diffusion-3-and-35-sd3)</li> |
-| LDM3D            |               | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#text-to-image-generation)</li> |
-| FLUX.1           | <li>LoRA</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#flux1)</li><li>[image-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#flux1-image-to-image)</li> |
-| Text to Video    |               | <li>Single card</li> | <li>[text-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#text-to-video-generation)</li> |
-| Image to Video   |               | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#image-to-video-generation)</li> |
-| i2vgen-xl   |               | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#I2vgen-xl)</li> |
+| Stable Diffusion    | :heavy_check_mark: | :heavy_check_mark: | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#text-to-image-generation)</li><li>[image-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#image-to-image-generation)</li> |
+| Stable Diffusion XL | :heavy_check_mark: | :heavy_check_mark: | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#stable-diffusion-xl-sdxl)</li><li>[image-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#stable-diffusion-xl-refiner)</li> |
+| Stable Diffusion Depth2img |         | <li>Single card</li> | <li>[depth-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)</li> |
+| Stable Diffusion 3  | :heavy_check_mark: | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#stable-diffusion-3-and-35-sd3)</li> |
+| LDM3D            |               | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#text-to-image-generation)</li> |
+| FLUX.1           | <li>LoRA</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#flux1)</li><li>[image-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#flux1-image-to-image)</li> |
+| Text to Video    |               | <li>Single card</li> | <li>[text-to-video generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#text-to-video-generation)</li> |
+| Image to Video   |               | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#image-to-video-generation)</li> |
+| i2vgen-xl   |               | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#I2vgen-xl)</li> |
 
 ### PyTorch Image Models/TIMM:
 
 | Architecture        | Training | Inference | Tasks |
 |:--------------------|:--------:|:---------:|:------|
-| FastViT       |          | <li>Single card</li> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
+| FastViT       |          | <li>Single card</li> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)</li> |
 
 ### TRL:
 
 | Architecture     | Training | Inference            | Tasks                                                                                          |
 |:-----------------|:--------:|:--------------------:|:-----------------------------------------------------------------------------------------------|
-| Llama 2          | :heavy_check_mark: |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl#dpo-pipeline)</li>  |
-| Llama 2          | :heavy_check_mark: |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl#ppo-pipeline)</li>  |
-| Stable Diffusion | :heavy_check_mark: |           | <li>[DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl#ddpo-pipeline)</li> |
+| Llama 2          | :heavy_check_mark: |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl#dpo-pipeline)</li>  |
+| Llama 2          | :heavy_check_mark: |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl#ppo-pipeline)</li>  |
+| Stable Diffusion | :heavy_check_mark: |           | <li>[DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl#ddpo-pipeline)</li> |
 
 Other models and tasks supported by the Transformers and Diffusers libraries may also work. You can refer to this [section](https://github.com/huggingface/optimum-habana#how-to-use-it)
-for using them with Optimum for Intel Gaudi. In addition, [this page](https://github.com/huggingface/optimum-habana/tree/main/examples) explains how to modify any
-[example](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the Transformers library to make it work with Optimum for Intel Gaudi.
+for using them with Optimum for Intel Gaudi. In addition, [this page](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples) explains how to modify any
+[example](https://github.com/huggingface/transformers/tree/v1.20-release/examples/pytorch) from the Transformers library to make it work with Optimum for Intel Gaudi.
 
 If you find any issues while using those, please open an issue or a pull request.
 
@@ -328,8 +328,8 @@ After training your model, feel free to submit it to the Intel [leaderboard](htt
 to evaluate, score, and rank open-source LLMs that have been pre-trained or fine-tuned on Intel Hardwares. Models submitted to the leaderboard will be evaluated on
 the Intel Developer Cloud. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from the Eleuther AI Language Model Evaluation Harness.
 
-The list of validated models through continuous integration tests is posted [here](https://github.com/huggingface/optimum-habana/tree/main/tests/Habana_Validated_Models.md)
+The list of validated models through continuous integration tests is posted [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/tests/Habana_Validated_Models.md)
 
 ## Development
 
-Check the [contributor guide](https://github.com/huggingface/optimum/blob/main/CONTRIBUTING.md) for instructions.
+Check the [contributor guide](https://github.com/huggingface/optimum/blob/v1.20-release/CONTRIBUTING.md) for instructions.
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index 4aba07b70d..74c7ef4287 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -58,92 +58,92 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 
 | Architecture | Training | Inference | Tasks |
 |--------------|:--------:|:---------:|:------|
-| BERT         | ✅       | ✅        | <li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text feature extraction](https://github.com/huggingface/optimum-habana/tree/main/examples/text-feature-extraction)</li> |
-| RoBERTa      | ✅       | ✅        | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li> |
-| ALBERT       | ✅       | ✅        | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li> |
-| DistilBERT   | ✅       | ✅        | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li> |
-| GPT2         | ✅       | ✅        | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| BLOOM(Z)     |          | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| StarCoder / StarCoder2 | ✅ | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| GPT-J        | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| GPT-Neo      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| GPT-NeoX     | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| OPT          |          | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Llama 2 / CodeLlama / Llama 3 / Llama Guard / Granite | ✅ | ✅ | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification) (Llama Guard)</li> |
-| StableLM     |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Falcon       | <div style="text-align:left"><li>LoRA</li></div> | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| CodeGen      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| MPT          |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Mistral      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Phi          | ✅       | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Mixtral      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Gemma        | ✅       | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Gemma2       |           | ✅        | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Qwen2 / Qwen3 | <div style="text-align:left"><li>Single card</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Qwen2-MoE    |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Persimmon    |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| XGLM         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Cohere       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| T5 / Flan T5 | ✅       | ✅        | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
-| BART         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
-| ViT          | ✅       | ✅        | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
-| Swin         | ✅       | ✅        | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
-| Wav2Vec2     | ✅       | ✅        | <li>[audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)</li><li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
-| Whisper      | ✅       | ✅        | <li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)</li> |
-| SpeechT5     |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)</li> |
-| CLIP         | ✅       | ✅        | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
-| BridgeTower  | ✅       | ✅        | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)</li> |
-| ESMFold      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)</li> |
-| Blip         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| OWLViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)</li> |
-| ClipSeg      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
-| Llava / Llava-next / Llava-onevision |    | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| Paligemma |    | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| idefics2     | <div style="text-align:left"><li>LoRA</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| SAM          |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
-| VideoMAE |          | <div style="text-align:left"><li>Single card</li></div> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)</li> |
-| TableTransformer |       | <div style="text-align:left"><li>Single card</li></div> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/table-detection)</li> |
-| DETR         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/object-detection)</li> |
-| Mllama     | <div style="text-align:left"><li>LoRA</li></div> |✅      | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| Video-LLaVA         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[video comprehension](https://github.com/huggingface/optimum-habana/tree/main/examples/video-comprehension)</li> |
-| MiniCPM3 |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Baichuan2 | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| DeepSeek-V2 | ✅ | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| DeepSeek-V3 / Moonlight |   | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| ChatGLM     | <div style="text-align:left"><li>DeepSpeed</li></div> |  <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
-| Qwen2-VL |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| GLM-4V |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
-| Arctic |          |  <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
+| BERT         | ✅       | ✅        | <li>[text classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text feature extraction](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-feature-extraction)</li> |
+| RoBERTa      | ✅       | ✅        | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li> |
+| ALBERT       | ✅       | ✅        | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li> |
+| DistilBERT   | ✅       | ✅        | <li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li> |
+| GPT2         | ✅       | ✅        | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| BLOOM(Z)     |          | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| StarCoder / StarCoder2 | ✅ | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| GPT-J        | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| GPT-Neo      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| GPT-NeoX     | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| OPT          |          | <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Llama 2 / CodeLlama / Llama 3 / Llama Guard / Granite | ✅ | ✅ | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)</li><li>[text classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification) (Llama Guard)</li> |
+| StableLM     |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Falcon       | <div style="text-align:left"><li>LoRA</li></div> | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| CodeGen      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| MPT          |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Mistral      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Phi          | ✅       | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Mixtral      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Gemma        | ✅       | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Gemma2       |           | ✅        | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Qwen2 / Qwen3 | <div style="text-align:left"><li>Single card</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Qwen2-MoE    |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Persimmon    |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| XGLM         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Cohere       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| T5 / Flan T5 | ✅       | ✅        | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
+| BART         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[summarization](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/summarization)</li><li>[translation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/translation)</li><li>[question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering#fine-tuning-t5-on-squad20)</li> |
+| ViT          | ✅       | ✅        | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)</li> |
+| Swin         | ✅       | ✅        | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)</li> |
+| Wav2Vec2     | ✅       | ✅        | <li>[audio classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/audio-classification)</li><li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition)</li> |
+| Whisper      | ✅       | ✅        | <li>[speech recognition](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition)</li> |
+| SpeechT5     |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text to speech](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-to-speech)</li> |
+| CLIP         | ✅       | ✅        | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/contrastive-image-text)</li> |
+| BridgeTower  | ✅       | ✅        | <li>[contrastive image-text training](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/contrastive-image-text)</li> |
+| ESMFold      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[protein folding](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/protein-folding)</li> |
+| Blip         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[visual question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/visual-question-answering)</li><li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| OWLViT       |          | <div style="text-align:left"><li>Single card</li></div> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/zero-shot-object-detection)</li> |
+| ClipSeg      |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-segementation)</li> |
+| Llava / Llava-next / Llava-onevision |    | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| Paligemma |    | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| idefics2     | <div style="text-align:left"><li>LoRA</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| SAM          |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-segementation)</li> |
+| VideoMAE |          | <div style="text-align:left"><li>Single card</li></div> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/video-classification)</li> |
+| TableTransformer |       | <div style="text-align:left"><li>Single card</li></div> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/table-detection)</li> |
+| DETR         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-detection)</li> |
+| Mllama     | <div style="text-align:left"><li>LoRA</li></div> |✅      | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| Video-LLaVA         |          | <div style="text-align:left"><li>Single card</li></div> | <li>[video comprehension](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/video-comprehension)</li> |
+| MiniCPM3 |   | <div style="text-align:left"><li>Single card</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Baichuan2 | <div style="text-align:left"><li>DeepSpeed</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| DeepSeek-V2 | ✅ | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| DeepSeek-V3 / Moonlight |   | ✅ | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| ChatGLM     | <div style="text-align:left"><li>DeepSpeed</li></div> |  <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
+| Qwen2-VL |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| GLM-4V |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)</li> |
+| Arctic |          |  <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)</li> |
 
 - Diffusers
 
 | Architecture        | Training | Inference | Tasks |
 |---------------------|:--------:|:---------:|:------|
-| Stable Diffusion    | ✅ | ✅ | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| Stable Diffusion XL | ✅ | ✅ | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| Stable Diffusion Depth2img | | <li>Single card</li> | <li>[depth-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| Stable Diffusion 3  | ✅ | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#stable-diffusion-3-and-35-sd3)</li> |
-| LDM3D               |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| FLUX.1              | <li>LoRA</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)</li> |
-| Text to Video       |          | <li>Single card</li> | <li>[text-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#text-to-video-generation)</li> |
-| Image to Video      |               | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#image-to-video-generation)</li> |
-| i2vgen-xl       |          | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion#I2vgen-xl)</li> |
+| Stable Diffusion    | ✅ | ✅ | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)</li> |
+| Stable Diffusion XL | ✅ | ✅ | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)</li> |
+| Stable Diffusion Depth2img | | <li>Single card</li> | <li>[depth-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)</li> |
+| Stable Diffusion 3  | ✅ | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#stable-diffusion-3-and-35-sd3)</li> |
+| LDM3D               |          | <div style="text-align:left"><li>Single card</li></div> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)</li> |
+| FLUX.1              | <li>LoRA</li> | <li>Single card</li> | <li>[text-to-image generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)</li> |
+| Text to Video       |          | <li>Single card</li> | <li>[text-to-video generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#text-to-video-generation)</li> |
+| Image to Video      |               | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#image-to-video-generation)</li> |
+| i2vgen-xl       |          | <li>Single card</li> | <li>[image-to-video generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion#I2vgen-xl)</li> |
 
 - PyTorch Image Models/TIMM:
 
 | Architecture        | Training | Inference | Tasks |
 |---------------------|:--------:|:---------:|:------|
-| FastViT             |          | <div style="text-align:left"><li>Single card</li></div> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)</li> |
+| FastViT             |          | <div style="text-align:left"><li>Single card</li></div> | <li>[image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)</li> |
 
 - TRL:
 
 | Architecture     | Training | Inference            | Tasks |
 |------------------|:--------:|:--------------------:|:------|
-| Llama 2          | ✅       |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
-| Llama 2          | ✅       |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
-| Stable Diffusion | ✅       |           | <li>[DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)</li> |
+| Llama 2          | ✅       |           | <li>[DPO Pipeline](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl)</li> |
+| Llama 2          | ✅       |           | <li>[PPO Pipeline](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl)</li> |
+| Stable Diffusion | ✅       |           | <li>[DDPO Pipeline](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl)</li> |
 
 
 Other models and tasks supported by the 🤗 Transformers and 🤗 Diffusers library may also work.
 You can refer to this [section](https://github.com/huggingface/optimum-habana#how-to-use-it) for using them with 🤗 Optimum for Intel Gaudi.
-In addition, [this page](https://github.com/huggingface/optimum-habana/tree/main/examples) explains how to modify any [example](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the 🤗 Transformers library to make it work with 🤗 Optimum for Intel Gaudi.
+In addition, [this page](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples) explains how to modify any [example](https://github.com/huggingface/transformers/tree/v1.20-release/examples/pytorch) from the 🤗 Transformers library to make it work with 🤗 Optimum for Intel Gaudi.
diff --git a/docs/source/package_reference/trainer.mdx b/docs/source/package_reference/trainer.mdx
index 740038cee8..69cdc11797 100644
--- a/docs/source/package_reference/trainer.mdx
+++ b/docs/source/package_reference/trainer.mdx
@@ -16,7 +16,7 @@ limitations under the License.
 
 # GaudiTrainer
 
-The [`GaudiTrainer`](https://huggingface.co/docs/optimum/habana/package_reference/trainer#optimum.habana.GaudiTrainer) class provides an extended API for the feature-complete [Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer). It is used in all the [example scripts](https://github.com/huggingface/optimum-habana/tree/main/examples).
+The [`GaudiTrainer`](https://huggingface.co/docs/optimum/habana/package_reference/trainer#optimum.habana.GaudiTrainer) class provides an extended API for the feature-complete [Transformers Trainer](https://huggingface.co/docs/transformers/main_classes/trainer). It is used in all the [example scripts](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples).
 
 Before instantiating your [`GaudiTrainer`](https://huggingface.co/docs/optimum/habana/package_reference/trainer#optimum.habana.GaudiTrainer), create a [`GaudiTrainingArguments`] object to access all the points of customization during training.
 
diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
index 1c3ec01d3a..91001247d8 100644
--- a/docs/source/quickstart.mdx
+++ b/docs/source/quickstart.mdx
@@ -73,9 +73,9 @@ git clone -b v1.19.0 https://github.com/huggingface/optimum-habana
 pip install ./optimum-habana
 ```
 
-All available examples are under [optimum-habana/examples](https://github.com/huggingface/optimum-habana/tree/main/examples).
+All available examples are under [optimum-habana/examples](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples).
 
-Here is [text-generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation) example,
+Here is [text-generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation) example,
 to run Llama-2 7B text generation example on Gaudi, complete the prerequisite setup:
 ```bash
 cd ~/optimum-habana/examples/text-generation
@@ -136,7 +136,7 @@ run_generation.py \
 🤗 Optimum for Intel Gaudi contains a number of examples demonstrating single and multi Gaudi device training/fine-tuning.
 
 For example, a number of language models can be trained with the scripts provided
-[language modeling examples section](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling).
+[language modeling examples section](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling).
 
 As an illustration, let us run GPT-2 single and multi card training examples on Gaudi.
 
@@ -240,7 +240,7 @@ outputs = pipeline(
 ```
 
 In addition, sample scripts for fine-tuning diffusion models are given in
-[Stable Diffusion training section](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion/training).
+[Stable Diffusion training section](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion/training).
 
 A more comprehensive list of examples in Optimum for Intel Gaudi is given next.
 
@@ -253,37 +253,37 @@ to see more options for running inference.
 Here are examples for various modalities and tasks that can be used out of the box:
 
 - **Text**
-  - [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)
-  - [multi node training](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training)
-  - [protein folding](https://github.com/huggingface/optimum-habana/tree/main/examples/protein-folding)
-  - [question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/question-answering)
-  - [sentence transformers training](https://github.com/huggingface/optimum-habana/tree/main/examples/sentence-transformers-training)
-  - [summarization](https://github.com/huggingface/optimum-habana/tree/main/examples/summarization)
-  - [table detection](https://github.com/huggingface/optimum-habana/tree/main/examples/table-detection)
-  - [text classification](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification)
-  - [text feature extraction](https://github.com/huggingface/optimum-habana/tree/main/examples/text-feature-extraction)
-  - [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
-  - [translation](https://github.com/huggingface/optimum-habana/tree/main/examples/translation)
-  - [trl](https://github.com/huggingface/optimum-habana/tree/main/examples/trl)
+  - [language modeling](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling)
+  - [multi node training](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training)
+  - [protein folding](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/protein-folding)
+  - [question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/question-answering)
+  - [sentence transformers training](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/sentence-transformers-training)
+  - [summarization](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/summarization)
+  - [table detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/table-detection)
+  - [text classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification)
+  - [text feature extraction](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-feature-extraction)
+  - [text generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation)
+  - [translation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/translation)
+  - [trl](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/trl)
 
 - **Audio**
-  - [audio classification](https://github.com/huggingface/optimum-habana/tree/main/examples/audio-classification)
-  - [speech recognition](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition)
-  - [text to speech](https://github.com/huggingface/optimum-habana/tree/main/examples/text-to-speech)
+  - [audio classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/audio-classification)
+  - [speech recognition](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition)
+  - [text to speech](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-to-speech)
 
 - **Images**
-  - [object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/object-detection)
-  - [object segementation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)
-  - [image classification](https://github.com/huggingface/optimum-habana/tree/main/examples/image-classification)
-  - [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
-  - [contrastive image text](https://github.com/huggingface/optimum-habana/tree/main/examples/contrastive-image-text)
-  - [stable diffusion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
-  - [visual question answering](https://github.com/huggingface/optimum-habana/tree/main/examples/visual-question-answering)
-  - [zero-shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)
+  - [object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-detection)
+  - [object segementation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/object-segementation)
+  - [image classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-classification)
+  - [image to text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/image-to-text)
+  - [contrastive image text](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/contrastive-image-text)
+  - [stable diffusion](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)
+  - [visual question answering](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/visual-question-answering)
+  - [zero-shot object detection](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/zero-shot-object-detection)
 
 - **Video**
-  - [stable-video-diffusion](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion)
-  - [video-classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)
+  - [stable-video-diffusion](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion)
+  - [video-classification](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/video-classification)
 
 To learn more about how to adapt 🤗 Transformers or Diffusers scripts for Intel Gaudi, check out
 [Script Adaptation](https://huggingface.co/docs/optimum/habana/usage_guides/script_adaptation) guide.
diff --git a/docs/source/tutorials/distributed.mdx b/docs/source/tutorials/distributed.mdx
index f664ab4e14..c6624ae144 100644
--- a/docs/source/tutorials/distributed.mdx
+++ b/docs/source/tutorials/distributed.mdx
@@ -18,10 +18,10 @@ limitations under the License.
 
 As models get bigger, parallelism has emerged as a strategy for training larger models on limited hardware and accelerating training speed by several orders of magnitude.
 
-All the [PyTorch examples](https://github.com/huggingface/optimum-habana/tree/main/examples) and the `GaudiTrainer` script work out of the box with distributed training.
+All the [PyTorch examples](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples) and the `GaudiTrainer` script work out of the box with distributed training.
 There are two ways of launching them:
 
-1. Using the [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/main/examples/gaudi_spawn.py) script:
+1. Using the [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/gaudi_spawn.py) script:
 
    - Use MPI for distributed training:
 
@@ -32,7 +32,7 @@ There are two ways of launching them:
      ```
 
      where `--argX` is an argument of the script to run in a distributed way.
-     Examples are given for question answering [here](https://github.com/huggingface/optimum-habana/blob/main/examples/question-answering/README.md#multi-card-training) and text classification [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification#multi-card-training).
+     Examples are given for question answering [here](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/question-answering/README.md#multi-card-training) and text classification [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification#multi-card-training).
 
    - Use DeepSpeed for distributed training:
 
@@ -43,7 +43,7 @@ There are two ways of launching them:
      ```
 
      where `--argX` is an argument of the script to run in a distributed way.
-     Examples are given for question answering [here](https://github.com/huggingface/optimum-habana/blob/main/examples/question-answering/README.md#using-deepspeed) and text classification [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-classification#using-deepspeed).
+     Examples are given for question answering [here](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/question-answering/README.md#using-deepspeed) and text classification [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-classification#using-deepspeed).
 
 2. Using the `DistributedRunner` directly in code:
 
diff --git a/docs/source/tutorials/inference.mdx b/docs/source/tutorials/inference.mdx
index 309fe54191..71023c88c4 100644
--- a/docs/source/tutorials/inference.mdx
+++ b/docs/source/tutorials/inference.mdx
@@ -22,7 +22,7 @@ An effective quick start would be to review the inference examples provided in t
 [here].
 
 You can also explore the 
-[examples in the Optimum for Intel Gaudi repository]((https://github.com/huggingface/optimum-habana/tree/main/examples)).
+[examples in the Optimum for Intel Gaudi repository]((https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples)).
 While the examples folder includes both training and inference, the inference-specific content
 provides valuable guidance for optimizing and running workloads on Intel Gaudi accelerators.
 
@@ -64,7 +64,7 @@ The variable `my_args` should contain some inference-specific arguments, you can
 
 ## In our Examples
 
-All [our examples](https://github.com/huggingface/optimum-habana/tree/main/examples) contain instructions for running inference with a given model on a given dataset.
+All [our examples](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples) contain instructions for running inference with a given model on a given dataset.
 The reasoning is the same for every example: run the example script with `--do_eval` and `--per_device_eval_batch_size` and without `--do_train`.
 A simple template is the following:
 ```bash
diff --git a/docs/source/tutorials/stable_diffusion.mdx b/docs/source/tutorials/stable_diffusion.mdx
index 574b7bbc25..0c249dd045 100644
--- a/docs/source/tutorials/stable_diffusion.mdx
+++ b/docs/source/tutorials/stable_diffusion.mdx
@@ -60,7 +60,7 @@ Generated images can be returned as either PIL images or NumPy arrays, depending
 
 <Tip>
 
-Check out the [example](https://github.com/huggingface/optimum-habana/tree/main/examples/stable-diffusion) provided in the official Github repository.
+Check out the [example](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/stable-diffusion) provided in the official Github repository.
 
 </Tip>
 
@@ -179,4 +179,4 @@ pipeline = GaudiStableDiffusionPipeline.from_pretrained(
 
 [Textual Inversion](https://arxiv.org/abs/2208.01618) is a method to personalize text2image models like Stable Diffusion on your own images using just 3-5 examples.
 
-You can find [here](https://github.com/huggingface/optimum-habana/blob/main/examples/stable-diffusion/textual_inversion.py) an example script that implements this training method.
+You can find [here](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/stable-diffusion/textual_inversion.py) an example script that implements this training method.
diff --git a/docs/source/usage_guides/deepspeed.mdx b/docs/source/usage_guides/deepspeed.mdx
index 40cd670383..04c9085240 100644
--- a/docs/source/usage_guides/deepspeed.mdx
+++ b/docs/source/usage_guides/deepspeed.mdx
@@ -105,7 +105,7 @@ This argument both indicates that DeepSpeed should be used and points to your De
 
 Finally, there are two possible ways to launch your script:
 
-1. Using the [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/main/examples/gaudi_spawn.py) script:
+1. Using the [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/gaudi_spawn.py) script:
 
 ```bash
 python gaudi_spawn.py \
diff --git a/docs/source/usage_guides/multi_node_training.mdx b/docs/source/usage_guides/multi_node_training.mdx
index 9b49ccda10..ebd342f520 100644
--- a/docs/source/usage_guides/multi_node_training.mdx
+++ b/docs/source/usage_guides/multi_node_training.mdx
@@ -46,7 +46,7 @@ Once your Intel Gaudi instances are ready, follow the steps for [setting up a mu
 
 Finally, there are two possible ways to run your training script on several nodes:
 
-1. With the [`gaudi_spawn.py`](https://github.com/huggingface/optimum-habana/blob/main/examples/gaudi_spawn.py) script, you can run the following command:
+1. With the [`gaudi_spawn.py`](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/gaudi_spawn.py) script, you can run the following command:
 ```bash
 python gaudi_spawn.py \
     --hostfile path_to_my_hostfile --use_deepspeed \
@@ -79,7 +79,7 @@ env_variable_2_name=value
 
 ## Recommendations
 
-- It is strongly recommended to use gradient checkpointing for multi-node runs to get the highest speedups. You can enable it with `--gradient_checkpointing` in [these examples](https://github.com/huggingface/optimum-habana/tree/main/examples) or with `gradient_checkpointing=True` in your `GaudiTrainingArguments`.
+- It is strongly recommended to use gradient checkpointing for multi-node runs to get the highest speedups. You can enable it with `--gradient_checkpointing` in [these examples](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples) or with `gradient_checkpointing=True` in your `GaudiTrainingArguments`.
 - Larger batch sizes should lead to higher speedups.
 - Multi-node inference is not recommended and can provide inconsistent results.
 - On Intel Tiber AI Cloud instances, run your Docker containers with the `--privileged` flag so that EFA devices are visible.
@@ -88,7 +88,7 @@ env_variable_2_name=value
 ## Example
 
 In this example, we fine-tune a pre-trained GPT2-XL model on the [WikiText dataset](https://huggingface.co/datasets/wikitext).
-We are going to use the [causal language modeling example which is given in the Github repository](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling#gpt-2gpt-and-causal-language-modeling).
+We are going to use the [causal language modeling example which is given in the Github repository](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/language-modeling#gpt-2gpt-and-causal-language-modeling).
 
 The first step consists in training the model on several nodes with this command:
 ```bash
diff --git a/docs/source/usage_guides/quantization.mdx b/docs/source/usage_guides/quantization.mdx
index a22cc1a9c8..65f766f165 100644
--- a/docs/source/usage_guides/quantization.mdx
+++ b/docs/source/usage_guides/quantization.mdx
@@ -17,7 +17,7 @@ limitations under the License.
 # Quantization
 
 Intel® Gaudi® offers several possibilities to make inference faster. For examples of FP8 and UINT4 for Inference, see the
-[text-generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation) example.
+[text-generation](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation) example.
 
 This guide provides the steps required to enable FP8 and UINT4 precision on your Intel® Gaudi® AI
 accelerator using the Intel® Neural Compressor (INC) package.
diff --git a/examples/README.md b/examples/README.md
index 9b4a65f31d..76c101a921 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -17,7 +17,7 @@ limitations under the License.
 
 This folder contains actively maintained examples of use of 🤗 Optimum Habana for various ML tasks.
 
-Other [examples](https://github.com/huggingface/transformers/tree/main/examples/pytorch) from the 🤗 Transformers library can be adapted the same way to enable deployment on Gaudi processors. This simply consists in:
+Other [examples](https://github.com/huggingface/transformers/tree/v1.20-release/examples/pytorch) from the 🤗 Transformers library can be adapted the same way to enable deployment on Gaudi processors. This simply consists in:
 - replacing the `Trainer` from 🤗 Transformers with the `GaudiTrainer` from 🤗 Optimum Habana,
 - replacing the `TrainingArguments` from 🤗 Transformers with the `GaudiTrainingArguments` from 🤗 Optimum Habana.
 
@@ -70,7 +70,7 @@ ip_2 slots=8
 ip_n slots=8
 ```
 
-You can find more information about multi-node training in the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/multi_node_training) and in the [`multi-node-training`](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training) folder where a Dockerfile is provided to easily set up your environment.
+You can find more information about multi-node training in the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/multi_node_training) and in the [`multi-node-training`](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training) folder where a Dockerfile is provided to easily set up your environment.
 
 
 ## Loading from a Tensorflow/Flax checkpoint file instead of a PyTorch model
diff --git a/examples/contrastive-image-text/README.md b/examples/contrastive-image-text/README.md
index f69ac8c8f9..881757a33a 100644
--- a/examples/contrastive-image-text/README.md
+++ b/examples/contrastive-image-text/README.md
@@ -164,7 +164,7 @@ python3 ../gaudi_spawn.py --world_size 8 --use_mpi run_clip.py \
 
 ### DeepSpeed
 
-You can check the [DeepSpeed](https://github.com/huggingface/optimum-habana/tree/main/examples#deepspeed) section in Optimum Habana examples for how to run DeepSpeed.
+You can check the [DeepSpeed](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples#deepspeed) section in Optimum Habana examples for how to run DeepSpeed.
 You can also look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
 
 
diff --git a/examples/image-to-text/README.md b/examples/image-to-text/README.md
index 2cb8532c70..97139907a5 100644
--- a/examples/image-to-text/README.md
+++ b/examples/image-to-text/README.md
@@ -17,7 +17,7 @@ limitations under the License.
 # Image to Text Examples
 This directory contains a script that showcases how to perform image to text generation on Intel® Gaudi® AI Accelerators.
 
-Habana FusedSDPA is a fused and optimized implementation of torch.nn.functional.scaled_dot_product_attention() for Gaudi. For more details, refer to [Gaudi online documentation](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Optimization_in_PyTorch_Models.html?highlight=fusedsdpa#using-fused-scaled-dot-product-attention-fusedsdpa). We optimized many models with FusedSDPA implementation as in [optimum/habana/transformers/models](https://github.com/huggingface/optimum-habana/tree/main/optimum/habana/transformers/models). If a model is not optimized with FusedSDPA, it uses [SDPA implementation](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html).
+Habana FusedSDPA is a fused and optimized implementation of torch.nn.functional.scaled_dot_product_attention() for Gaudi. For more details, refer to [Gaudi online documentation](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Optimization_in_PyTorch_Models.html?highlight=fusedsdpa#using-fused-scaled-dot-product-attention-fusedsdpa). We optimized many models with FusedSDPA implementation as in [optimum/habana/transformers/models](https://github.com/huggingface/optimum-habana/tree/v1.20-release/optimum/habana/transformers/models). If a model is not optimized with FusedSDPA, it uses [SDPA implementation](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html).
 
 ## Inference with mixed-precision (BF16)
 
diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md
index 6db5d4f5ca..1895a81a60 100644
--- a/examples/language-modeling/README.md
+++ b/examples/language-modeling/README.md
@@ -128,7 +128,7 @@ PT_HPU_LAZY_MODE=1 python ../gaudi_spawn.py \
     --deepspeed path_for_deepspeed_config
 ```
 
-This example has been validated with the following DeepSpeed ZeRO-2 config: https://github.com/huggingface/optimum-habana/blob/main/tests/configs/deepspeed_zero_2.json
+This example has been validated with the following DeepSpeed ZeRO-2 config: https://github.com/huggingface/optimum-habana/blob/v1.20-release/tests/configs/deepspeed_zero_2.json
 
 
 ## Multi-Node Training with Deepspeed (GPT-NeoX)
@@ -140,7 +140,7 @@ It reaches a perplexity of 10.469.
 > [!NOTE]
 >  For GPT-NeoX-20B model, please switch to jemalloc in case of host OOM issues using ``` export LD_PRELOAD=<path>/libjemalloc.so.2 ```
 
-> Please refer to [this page](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training) for performing multi-node training properly.
+> Please refer to [this page](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training) for performing multi-node training properly.
 
 ```bash
 PT_HPU_LAZY_MODE=1 python ../gaudi_spawn.py \
@@ -162,7 +162,7 @@ PT_HPU_LAZY_MODE=1 python ../gaudi_spawn.py \
     --deepspeed path_for_deepspeed_config
 ```
 
-This example has been validated with the following DeepSpeed ZeRO-2 config: https://github.com/huggingface/optimum-habana/blob/main/tests/configs/deepspeed_zero_2.json
+This example has been validated with the following DeepSpeed ZeRO-2 config: https://github.com/huggingface/optimum-habana/blob/v1.20-release/tests/configs/deepspeed_zero_2.json
 
 
 ## RoBERTa/BERT/DistilBERT and masked language modeling
diff --git a/examples/multi-node-training/README.md b/examples/multi-node-training/README.md
index bc0ba8fda0..a6e98f1404 100644
--- a/examples/multi-node-training/README.md
+++ b/examples/multi-node-training/README.md
@@ -30,8 +30,8 @@ where `--argX` is an argument of the script to run.
 Check out the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/multi_node_training) to know how to set up your Gaudi instances for multi-node runs on premises or on AWS.
 
 We provide two `Dockerfile` to easily start your multi-node runs:
-- A `Dockerfile` provided [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/EFA/Dockerfile) for multi-node runs on AWS.
-- A `Dockerfile` provided [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/GaudiNIC/Dockerfile) for multi-node runs using GaudiNIC.
+- A `Dockerfile` provided [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training/EFA/Dockerfile) for multi-node runs on AWS.
+- A `Dockerfile` provided [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training/GaudiNIC/Dockerfile) for multi-node runs using GaudiNIC.
 
 
 The Dockerfile is based on an image compatible with Ubuntu 22.04 but you can easily adapt it to another OS.
@@ -99,7 +99,7 @@ ip_2 slots=8
 ip_n slots=8
 ```
 
-You can find a template [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/hostfile).
+You can find a template [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training/hostfile).
 
 
 ## Environment variables
@@ -111,11 +111,11 @@ env_variable_2_name=value
 ...
 ```
 
-You can find an example for GaudiNIC instances [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/GaudiNIC/.deepspeed_env).
+You can find an example for GaudiNIC instances [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training/GaudiNIC/.deepspeed_env).
 
 > Note above environment variables refers to /etc/profile.d/habanalabs.sh inside docker, and should set only on GaudiNIC master node.
 
-You can find an example for AWS instances [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/EFA/.deepspeed_env).
+You can find an example for AWS instances [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/multi-node-training/EFA/.deepspeed_env).
 
 > Note that one should set `HCCL_OVER_OFI=1` and `LD_LIBRARY_PATH=/root/hccl_ofi_wrapper:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib` only on AWS DL1 instances. *These should not be used otherwise*.
 
diff --git a/examples/sentence-transformers-training/README.md b/examples/sentence-transformers-training/README.md
index 8936d91abf..a8265d3bbe 100644
--- a/examples/sentence-transformers-training/README.md
+++ b/examples/sentence-transformers-training/README.md
@@ -2,11 +2,11 @@
 
 We provide 3 examples to show how to use Sentence Transformers with HPU devices.
 
-- **[training_stsbenchmark.py](https://github.com/huggingface/optimum-habana/tree/main/examples/sentence-transformers-training/sts)** - This example shows how to create a Sentence Transformers model from scratch by using a pre-trained transformer model (e.g. [`distilbert-base-uncased`](https://huggingface.co/distilbert/distilbert-base-uncased)) together with a pooling layer.
+- **[training_stsbenchmark.py](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/sentence-transformers-training/sts)** - This example shows how to create a Sentence Transformers model from scratch by using a pre-trained transformer model (e.g. [`distilbert-base-uncased`](https://huggingface.co/distilbert/distilbert-base-uncased)) together with a pooling layer.
 
-- **[training_nli.py](https://github.com/huggingface/optimum-habana/tree/main/examples/sentence-transformers-training/nli)** - This example provides two sentences (a premise and a hypothesis), and the task of Natural Language Inference (NLI) is to determine whether the premise entails the hypothesis, contradicts it, or if they are neutral. Commonly the NLI dataset in [SNLI](https://huggingface.co/datasets/stanfordnlp/snli) and [MultiNLI](https://huggingface.co/datasets/nyu-mll/multi_nli) are used.
+- **[training_nli.py](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/sentence-transformers-training/nli)** - This example provides two sentences (a premise and a hypothesis), and the task of Natural Language Inference (NLI) is to determine whether the premise entails the hypothesis, contradicts it, or if they are neutral. Commonly the NLI dataset in [SNLI](https://huggingface.co/datasets/stanfordnlp/snli) and [MultiNLI](https://huggingface.co/datasets/nyu-mll/multi_nli) are used.
 
-- **[training_paraphrases.py](https://github.com/huggingface/optimum-habana/tree/main/examples/sentence-transformers-training/paraphrases)** - This example loads various datasets from Sentence Transformers. We construct batches by sampling examples from the respective dataset.
+- **[training_paraphrases.py](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/sentence-transformers-training/paraphrases)** - This example loads various datasets from Sentence Transformers. We construct batches by sampling examples from the respective dataset.
 
 ### Tested Examples/Models and Configurations
 
diff --git a/examples/sentence-transformers-training/nli/README.md b/examples/sentence-transformers-training/nli/README.md
index d59fc7b03b..3b9d4e7d5e 100644
--- a/examples/sentence-transformers-training/nli/README.md
+++ b/examples/sentence-transformers-training/nli/README.md
@@ -50,7 +50,7 @@ If you want to save the checkpoints for the model you need using `--saving_model
 
 ## Multi-card Training
 
-For multi-card training you can use the script of [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/main/examples/gaudi_spawn.py) to execute. There are two options to run the multi-card training by using '--use_deepspeed' or '--use_mpi'. We take the option of '--use_deepspeed' for our example of  multi-card training.
+For multi-card training you can use the script of [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/gaudi_spawn.py) to execute. There are two options to run the multi-card training by using '--use_deepspeed' or '--use_mpi'. We take the option of '--use_deepspeed' for our example of  multi-card training.
 
 ```bash
 HABANA_VISIBLE_MODULES="2,3" PT_HPU_LAZY_MODE=1 python ../../gaudi_spawn.py --use_deepspeed --world_size 2 training_nli.py bert-base-uncased
diff --git a/examples/sentence-transformers-training/sts/README.md b/examples/sentence-transformers-training/sts/README.md
index 7c6e27536c..1838219802 100644
--- a/examples/sentence-transformers-training/sts/README.md
+++ b/examples/sentence-transformers-training/sts/README.md
@@ -37,7 +37,7 @@ If you want to save the checkpoints for training model you need using `--saving_
 
 ## Multi-card Training
 
-For multi-card training you can use the script of [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/main/examples/gaudi_spawn.py) to execute. There are two options to run the multi-card training by using '--use_deepspeed' or '--use_mpi'. We take the option of '--use_deepspeed' for our example of  multi-card training.
+For multi-card training you can use the script of [gaudi_spawn.py](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/gaudi_spawn.py) to execute. There are two options to run the multi-card training by using '--use_deepspeed' or '--use_mpi'. We take the option of '--use_deepspeed' for our example of  multi-card training.
 
 ```bash
 HABANA_VISIBLE_MODULES="2,3" PT_HPU_LAZY_MODE=1 python ../../gaudi_spawn.py --use_deepspeed --world_size 2 training_stsbenchmark.py bert-base-uncased
diff --git a/examples/speech-recognition/README.md b/examples/speech-recognition/README.md
index 69625cc0ab..6453bd3779 100644
--- a/examples/speech-recognition/README.md
+++ b/examples/speech-recognition/README.md
@@ -42,11 +42,11 @@ pip install -r requirements.txt
 
 ## Connectionist Temporal Classification
 
-The script [`run_speech_recognition_ctc.py`](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition/run_speech_recognition_ctc.py) can be used to fine-tune any pretrained [Connectionist Temporal Classification Model](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForCTC) for automatic speech recognition on one of the [official speech recognition datasets](https://huggingface.co/datasets?task_ids=task_ids:automatic-speech-recognition) or a custom dataset.
+The script [`run_speech_recognition_ctc.py`](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition/run_speech_recognition_ctc.py) can be used to fine-tune any pretrained [Connectionist Temporal Classification Model](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForCTC) for automatic speech recognition on one of the [official speech recognition datasets](https://huggingface.co/datasets?task_ids=task_ids:automatic-speech-recognition) or a custom dataset.
 
 Speech recognition models that have been pretrained in an unsupervised fashion on audio data alone, *e.g.* [Wav2Vec2](https://huggingface.co/transformers/main/model_doc/wav2vec2.html), have shown to require only very little annotated data to yield good performance on automatic speech recognition datasets.
 
-In the script [`run_speech_recognition_ctc`](https://github.com/huggingface/optimum-habana/tree/main/examples/speech-recognition/run_speech_recognition_ctc.py), we first create a vocabulary from all unique characters of both the training data and evaluation data. Then, we preprocess the speech recognition dataset, which includes correct resampling, normalization and padding. Finally, the pretrained speech recognition model is fine-tuned on the annotated speech recognition datasets using CTC loss.
+In the script [`run_speech_recognition_ctc`](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/speech-recognition/run_speech_recognition_ctc.py), we first create a vocabulary from all unique characters of both the training data and evaluation data. Then, we preprocess the speech recognition dataset, which includes correct resampling, normalization and padding. Finally, the pretrained speech recognition model is fine-tuned on the annotated speech recognition datasets using CTC loss.
 
 <!-- ---
 **NOTE**
diff --git a/examples/stable-diffusion/training/train_dreambooth_lora_flux.py b/examples/stable-diffusion/training/train_dreambooth_lora_flux.py
index 8581dd9b14..5a5f21afc6 100755
--- a/examples/stable-diffusion/training/train_dreambooth_lora_flux.py
+++ b/examples/stable-diffusion/training/train_dreambooth_lora_flux.py
@@ -105,7 +105,7 @@ def save_model_card(
 These are {repo_id} DreamBooth LoRA weights for {base_model}.
 
 The weights were trained using [DreamBooth](https://dreambooth.github.io/) with the
-[Gaudi Flux diffusers trainer](https://github.com/huggingface/optimum-habana/blob/main/examples/stable-diffusion/training/README.md).
+[Gaudi Flux diffusers trainer](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/stable-diffusion/training/README.md).
 
 Was LoRA for the text encoder enabled? False.
 
diff --git a/examples/summarization/README.md b/examples/summarization/README.md
index 75e509ab51..6ecdd28550 100644
--- a/examples/summarization/README.md
+++ b/examples/summarization/README.md
@@ -179,7 +179,7 @@ PT_HPU_LAZY_MODE=1 python ../gaudi_spawn.py \
 
 ## Using DeepSpeed
 
-You can check the [DeepSpeed](https://github.com/huggingface/optimum-habana/tree/main/examples#deepspeed) section in Optimum Habana examples for how to run DeepSpeed.
+You can check the [DeepSpeed](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples#deepspeed) section in Optimum Habana examples for how to run DeepSpeed.
 You also can look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
 
 
diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md
index 8f60425907..16b87476d4 100755
--- a/examples/text-generation/README.md
+++ b/examples/text-generation/README.md
@@ -161,7 +161,7 @@ python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py \
 --flash_attention_causal_mask
 ```
 
-To run Deepseek-R1-BF16 inference on 16 Gaudi3 cards (2 nodes) use the following command. Ensure you replace the hostfile parameter with the appropriate file. Sample hostfile reference [here](https://github.com/huggingface/optimum-habana/blob/main/examples/multi-node-training/hostfile)
+To run Deepseek-R1-BF16 inference on 16 Gaudi3 cards (2 nodes) use the following command. Ensure you replace the hostfile parameter with the appropriate file. Sample hostfile reference [here](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/multi-node-training/hostfile)
 
 > NOTE: This is an experimental support currently. Due to memory constraints, BS=1 is only supported for now.
 
@@ -835,4 +835,4 @@ PT_HPU_LAZY_MODE=1 python run_lm_eval.py \
 
 ## Text-Generation Pipeline
 
-A Transformers-like pipeline is defined and provided [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation/text-generation-pipeline). It is optimized for Gaudi and can be called to generate text in your scripts.
\ No newline at end of file
+A Transformers-like pipeline is defined and provided [here](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation/text-generation-pipeline). It is optimized for Gaudi and can be called to generate text in your scripts.
\ No newline at end of file
diff --git a/examples/trl/README.md b/examples/trl/README.md
index b3cd08b70b..126e08931b 100644
--- a/examples/trl/README.md
+++ b/examples/trl/README.md
@@ -210,7 +210,7 @@ which will also push the model to your HuggingFace hub account.
 
 ### Running the model
 
-We can load the DPO-trained LoRA adaptors which were saved by the DPO training step and run it through the [text-generation example](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation).
+We can load the DPO-trained LoRA adaptors which were saved by the DPO training step and run it through the [text-generation example](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation).
 
 ```bash
 PT_HPU_LAZY_MODE=1 PT_ENABLE_INT64_SUPPORT=1 python ../gaudi_spawn.py --world_size 8 --use_deepspeed run_generation.py \
@@ -297,7 +297,7 @@ There are three main steps to the PPO training process:
     ```
 
 ### Running the model
-We can load the PPO-trained LoRA adaptors which were saved by the PPO training step and run it through the [text-generation example](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation).
+We can load the PPO-trained LoRA adaptors which were saved by the PPO training step and run it through the [text-generation example](https://github.com/huggingface/optimum-habana/tree/v1.20-release/examples/text-generation).
 
 ```bash
 PT_HPU_LAZY_MODE=1 PT_ENABLE_INT64_SUPPORT=1 python run_generation.py \
diff --git a/notebooks/AI_HW_Summit_2022.ipynb b/notebooks/AI_HW_Summit_2022.ipynb
index 883c3cff75..9721c3a7d8 100644
--- a/notebooks/AI_HW_Summit_2022.ipynb
+++ b/notebooks/AI_HW_Summit_2022.ipynb
@@ -41,7 +41,7 @@
    "source": [
     "## Training Script\n",
     "\n",
-    "We are going to use the `run_clm.py` example script that you can find [here](https://github.com/huggingface/optimum-habana/blob/main/examples/language-modeling/run_clm.py). It performs the following:\n",
+    "We are going to use the `run_clm.py` example script that you can find [here](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/language-modeling/run_clm.py). It performs the following:\n",
     "- download and preprocess the dataset,\n",
     "- instantiate the model by downloading a pre-trained checkpoint or initializing a new one,\n",
     "- download a tokenizer,\n",
@@ -198,7 +198,7 @@
    "source": [
     "### Running the Script\n",
     "\n",
-    "We are going to leverage the `DistributedRunner` class to launch a distributed training. This could also be done with the [`gaudi_spawn.py`](https://github.com/huggingface/optimum-habana/blob/main/examples/gaudi_spawn.py) script. More information [here](https://huggingface.co/docs/optimum/habana/usage_guides/distributed).\n",
+    "We are going to leverage the `DistributedRunner` class to launch a distributed training. This could also be done with the [`gaudi_spawn.py`](https://github.com/huggingface/optimum-habana/blob/v1.20-release/examples/gaudi_spawn.py) script. More information [here](https://huggingface.co/docs/optimum/habana/usage_guides/distributed).\n",
     "\n",
     "To be initialized, an instance of this class requires the command to execute and the number of devices to use. Since one Gaudi has 8 HPUs, we are going to use all of them.\n",
     "\n",
@@ -270,7 +270,7 @@
    "id": "08dcd80f",
    "metadata": {},
    "source": [
-    "We need a DeepSpeed configuration. We are going to use [this one](https://github.com/huggingface/optimum-habana/tree/main/notebooks/configs/deepspeed_zero_2.json)."
+    "We need a DeepSpeed configuration. We are going to use [this one](https://github.com/huggingface/optimum-habana/tree/v1.20-release/notebooks/configs/deepspeed_zero_2.json)."
    ]
   },
   {
diff --git a/optimum/habana/transformers/models/chatglm/modeling_chatglm.py b/optimum/habana/transformers/models/chatglm/modeling_chatglm.py
index 6f71fef42e..a40f8ba1c4 100644
--- a/optimum/habana/transformers/models/chatglm/modeling_chatglm.py
+++ b/optimum/habana/transformers/models/chatglm/modeling_chatglm.py
@@ -185,7 +185,7 @@ def gaudi_chatglm_repeat_kv(
     attention_mask: torch.Tensor,
 ):
     """
-    Refer https://github.com/huggingface/optimum-habana/blob/main/optimum/habana/transformers/models/llama/modeling_llama.py#L109
+    Refer https://github.com/huggingface/optimum-habana/blob/v1.20-release/optimum/habana/transformers/models/llama/modeling_llama.py#L109
     Copied from repeat_kv: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
     The only differences are:
         - Append num_key_value_heads == 1 check as kv states can be broadcasted during matmuls so need to expand and reshape them.
diff --git a/optimum/habana/transformers/models/glm4v/modeling_chatglm.py b/optimum/habana/transformers/models/glm4v/modeling_chatglm.py
index dcf1d91e27..e68c2c6d5f 100644
--- a/optimum/habana/transformers/models/glm4v/modeling_chatglm.py
+++ b/optimum/habana/transformers/models/glm4v/modeling_chatglm.py
@@ -283,7 +283,7 @@ def gaudi_chatglm_repeat_kv(
     attention_mask: torch.Tensor,
 ):
     """
-    Refer https://github.com/huggingface/optimum-habana/blob/main/optimum/habana/transformers/models/llama/modeling_llama.py#L109
+    Refer https://github.com/huggingface/optimum-habana/blob/v1.20-release/optimum/habana/transformers/models/llama/modeling_llama.py#L109
     Copied from repeat_kv: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
     The only differences are:
         - Append num_key_value_heads == 1 check as kv states can be broadcasted during matmuls so need to expand and reshape them.