diff --git a/.github/workflows/fast_tests.yml b/.github/workflows/fast_tests.yml
index b47decc095..7a0f7f0117 100644
--- a/.github/workflows/fast_tests.yml
+++ b/.github/workflows/fast_tests.yml
@@ -18,7 +18,7 @@ jobs:
       group: itac-bm-emr-gaudi3-dell-2gaudi
 
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -43,7 +43,7 @@ jobs:
       group: itac-bm-emr-gaudi3-dell-1gaudi
 
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml
index 268a4c0e28..aed176a08d 100644
--- a/.github/workflows/slow_tests_gaudi2.yml
+++ b/.github/workflows/slow_tests_gaudi2.yml
@@ -17,7 +17,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -30,7 +30,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash tests/ci/example_diff_tests.sh
   stable-diffusion:
     name: Test Stable Diffusion
@@ -43,7 +43,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -59,7 +59,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash tests/ci/slow_tests_diffusers.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
   deepspeed:
     name: Test DeepSpeed models
@@ -72,7 +72,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -88,7 +88,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash tests/ci/slow_tests_deepspeed.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
   fsdp:
     name: Test FSDP models
@@ -101,7 +101,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -117,7 +117,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             make slow_tests_fsdp TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
   multi-card:
     name: Test multi-card models
@@ -130,7 +130,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -146,7 +146,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash tests/ci/slow_tests_8x.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
   single-card:
     name: Test single-card models
@@ -160,7 +160,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -177,7 +177,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash tests/ci/slow_tests_1x.sh
   text-generation:
     name: Test text-generation example
@@ -192,7 +192,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -208,7 +208,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
   trl:
     name: Test TRL integration
@@ -221,7 +221,7 @@ jobs:
         uses: actions/checkout@v2
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -237,7 +237,7 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash tests/ci/slow_tests_trl.sh
   sentence-transformers:
     name: Test Sentence Transformers integration
@@ -258,7 +258,7 @@ jobs:
           path: sentence-transformers
       - name: Pull image
         run: |
-            docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       - name: Run tests
         run: |
             docker run \
@@ -274,5 +274,5 @@ jobs:
             --cap-add=sys_nice \
             --net=host \
             --ipc=host \
-            vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+            vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
             /bin/bash optimum-habana/tests/ci/sentence_transformers.sh
diff --git a/.github/workflows/slow_tests_gaudi3.yml b/.github/workflows/slow_tests_gaudi3.yml
index 03960d70af..6f9b3e699f 100644
--- a/.github/workflows/slow_tests_gaudi3.yml
+++ b/.github/workflows/slow_tests_gaudi3.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-1gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -37,7 +37,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-8gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -60,7 +60,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-8gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -83,7 +83,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-8gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -106,7 +106,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-8gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -130,7 +130,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-1gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -155,7 +155,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-8gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -178,7 +178,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-1gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -201,7 +201,7 @@ jobs:
     runs-on:
       group: itac-bm-emr-gaudi3-dell-1gaudi
     container:
-      image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+      image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
       options: --workdir=/root/workspace --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
       env:
         OMPI_MCA_btl_vader_single_copy_mechanism: none
diff --git a/Makefile b/Makefile
index 433ce83fec..e03219dbcb 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
 export PT_HPU_LAZY_MODE=1
 # will be removed when lazy is disabled
 
-.PHONY:	style test
+.PHONY:	style test install_deepspeed
 
 # Run code quality checks
 style_check: clean
@@ -96,13 +96,12 @@ slow_tests_1x: test_installs
 # Run multi-card non-regression tests
 slow_tests_8x: test_installs
 	@status1=0; status2=0; \
-	DATA_CACHE=$(DATA_CACHE) python -m pytest tests/test_examples.py -v -s -k "multi_card" || status1=$$?; \
+	DATASET_CONFIG='$(DATASET_CONFIG)' python -m pytest tests/test_examples.py -v -s -k "multi_card" || status1=$$?; \
 	python -m pytest tests/test_habana_profiler_integration.py -v -s -m x8 || status2=$$?; \
 	exit $$((status1 + status2))
 
 # Run DeepSpeed non-regression tests
-slow_tests_deepspeed: test_installs
-	python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_deepspeed: test_installs install_deepspeed
 	python -m pytest tests/test_examples.py -v -s -k "deepspeed"
 
 slow_tests_diffusers: test_installs
@@ -113,10 +112,9 @@ slow_tests_sentence_transformers: test_installs
 	python -m pytest tests/test_sentence_transformers.py -v -s
 
 # Run all text-generation non-regression tests
-slow_tests_text_generation_example: test_installs
+slow_tests_text_generation_example: test_installs install_deepspeed
 	python -m pip install -r examples/text-generation/requirements_awq.txt
 	BUILD_CUDA_EXT=0 python -m pip install -vvv --no-build-isolation git+https://github.com/HabanaAI/AutoGPTQ.git
-	python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
 	python -m pip install tiktoken blobfile
 	python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder.py -v -s --token $(TOKEN)
 
@@ -127,18 +125,15 @@ slow_tests_text_generation_example_1x: test_installs
 	python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder.py -m "(not x2) and (not x4) and (not x8)" -v -s --token $(TOKEN)
 
 # Run subset of text-generation non-regression tests that require 2 Gaudi cards
-slow_tests_text_generation_example_2x: test_installs
-	python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_text_generation_example_2x: test_installs install_deepspeed
 	python -m pytest tests/test_text_generation_example.py -m x2 -v -s --token $(TOKEN)
 
 # Run subset of text-generation non-regression tests that require 4 Gaudi cards
-slow_tests_text_generation_example_4x: test_installs
-	python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_text_generation_example_4x: test_installs install_deepspeed
 	python -m pytest tests/test_text_generation_example.py -m x4 -v -s --token $(TOKEN)
 
 # Run subset of text-generation non-regression tests that require 8 Gaudi cards
-slow_tests_text_generation_example_8x: test_installs
-	python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_text_generation_example_8x: test_installs install_deepspeed
 	python -m pytest tests/test_text_generation_example.py -m x8 -v -s --token $(TOKEN)
 
 # Run image-to-text non-regression tests
@@ -146,7 +141,10 @@ slow_tests_image_to_text_example: test_installs
 	python -m pytest tests/test_image_to_text_example.py -v -s --token $(TOKEN)
 
 slow_tests_image_to_text_example_1x: test_installs
-	python -m pytest tests/test_image_to_text_example.py -m "not x8" -v -s --token $(TOKEN)
+	python -m pytest tests/test_image_to_text_example.py -m "(not x2) and (not x8)" -v -s --token $(TOKEN)
+
+slow_tests_image_to_text_example_2x: test_installs
+	python -m pytest tests/test_image_to_text_example.py -m x2 -v -s --token $(TOKEN)
 
 slow_tests_image_to_text_example_8x: test_installs
 	python -m pytest tests/test_image_to_text_example.py -m x8 -v -s --token $(TOKEN)
@@ -225,3 +223,6 @@ clean:
 
 test_installs:
 	python -m pip install .[tests]
+
+install_deepspeed:
+	python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
diff --git a/README.md b/README.md
index 95c4f2d20d..e9b1a96f4a 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ Please refer to the Intel Gaudi AI Accelerator official [installation guide](htt
 > Tests should be run in a Docker container based on Intel Gaudi's official images. Instructions to
 > obtain the latest containers from the Intel Gaudi Vault are available
 > [here](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#use-intel-gaudi-containers).
-> The current Optimum for Intel Gaudi has been validated with Intel Gaudi v1.21 stack.
+> The current Optimum for Intel Gaudi has been validated with Intel Gaudi v1.22 stack.
 
 
 ## Install the library and get example scripts
@@ -65,9 +65,9 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up
 To use the example associated with the latest stable release, run:
 ```bash
 git clone https://github.com/huggingface/optimum-habana
-cd optimum-habana && git checkout v1.18.0
+cd optimum-habana && git checkout v1.19.0
 ```
-with `v1.18.0` being the latest Optimum for Intel Gaudi release version.
+with `v1.19.0` being the latest Optimum for Intel Gaudi release version.
 
 ### Option 2: Use the latest main branch under development
 
@@ -284,7 +284,8 @@ The following model architectures, tasks and device distributions have been vali
 | ChatGLM | <li>DeepSpeed</li> | <li>Single card</li> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | Qwen2-VL |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
 | VideoLLaVA | | <div style="text-align:left"><li>Single card</li></div> | <li>[Video comprehension](https://github.com/huggingface/optimum-habana/tree/main/examples/video-comprehension)</li> |
-| GLM-4V | |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li>
+| GLM-4V | |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
+| Arctic |          |  <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 
 </div>
 
diff --git a/conftest.py b/conftest.py
index b05956913c..e6dc2b57ff 100644
--- a/conftest.py
+++ b/conftest.py
@@ -3,6 +3,7 @@
 import operator
 import os
 import sys
+import time
 from pathlib import Path
 
 import pytest
@@ -113,6 +114,14 @@ def token(request):
 
 
 def pytest_configure(config):
+    junitxml_path = config.getoption("junitxml", None)
+    junitxml_global_dir = os.getenv("JUNITXML_DIR", None)
+
+    if not junitxml_path and junitxml_global_dir:
+        timestamp = time.strftime("%Y%m%d%H%M%S")
+        os.makedirs(junitxml_global_dir, exist_ok=True)
+        config.option.xmlpath = os.path.join(junitxml_global_dir, f"result_{timestamp}.xml")
+
     # Bitsandbytes installation for {test_bnb_qlora.py test_bnb_inference.py} tests
     # This change will be reverted shortly
     bnb_tests = any("bnb" in name for name in config.known_args_namespace.file_or_dir)
@@ -126,7 +135,7 @@ def pytest_configure(config):
                 "-m",
                 "pip",
                 "install",
-                "git+https://github.com/bitsandbytes-foundation/bitsandbytes.git@multi-backend-refactor",
+                "git+https://github.com/bitsandbytes-foundation/bitsandbytes.git@main",
             ]
         )
     name = ""
diff --git a/docs/Dockerfile b/docs/Dockerfile
index 038c41100b..51c23063b7 100644
--- a/docs/Dockerfile
+++ b/docs/Dockerfile
@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
 
 ARG commit_sha
 ARG clone_url
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index 7c0246dc0f..4aba07b70d 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -113,6 +113,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
 | ChatGLM     | <div style="text-align:left"><li>DeepSpeed</li></div> |  <div style="text-align:left"><li>Single card</li></div> | <li>[language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)</li><li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 | Qwen2-VL |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
 | GLM-4V |          |  <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
+| Arctic |          |  <div style="text-align:left"><li>DeepSpeed</li></div> | <li>[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)</li> |
 
 - Diffusers
 
diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
index d077f21939..1c3ec01d3a 100644
--- a/docs/source/quickstart.mdx
+++ b/docs/source/quickstart.mdx
@@ -32,10 +32,10 @@ platform for deep learning and follow the steps to start and connect to the node
 ## Docker Setup
 
 Now that you have access to the node, you will use the latest Intel Gaudi AI Accelerator docker image by executing the docker run command which will
-automatically download and run the docker. At the time of writing this guide, latest Gaudi docker version was 1.21.0:
+automatically download and run the docker. At the time of writing this guide, latest Gaudi docker version was 1.22.0:
 
 ```bash
-release=1.21.0
+release=1.22.0
 os=ubuntu22.04
 torch=2.6.0
 docker_image=vault.habana.ai/gaudi-docker/$release/$os/habanalabs/pytorch-installer-$torch:latest
@@ -65,11 +65,11 @@ docker run -itd \
 ## Optimum for Intel Gaudi Setup
 
 Check latest release of Optimum for Intel Gaudi [here](https://github.com/huggingface/optimum-habana/releases).
-At the time of writing this guide, latest Optimum for Intel Gaudi release version was v1.18.0, which is paired with Intel Gaudi Software release
-version 1.21.0.  Install Optimum for Intel Gaudi as follows:
+At the time of writing this guide, latest Optimum for Intel Gaudi release version was v1.19.0, which is paired with Intel Gaudi Software release
+version 1.22.0.  Install Optimum for Intel Gaudi as follows:
 
 ```bash
-git clone -b v1.18.0 https://github.com/huggingface/optimum-habana
+git clone -b v1.19.0 https://github.com/huggingface/optimum-habana
 pip install ./optimum-habana
 ```
 
diff --git a/docs/source/usage_guides/deepspeed.mdx b/docs/source/usage_guides/deepspeed.mdx
index 098f1192b6..40cd670383 100644
--- a/docs/source/usage_guides/deepspeed.mdx
+++ b/docs/source/usage_guides/deepspeed.mdx
@@ -79,7 +79,7 @@ It is strongly advised to read [this section](https://huggingface.co/docs/transf
 
 </Tip>
 
-Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.21.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Intel.
+Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.22.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Intel.
 
 The [Transformers documentation](https://huggingface.co/docs/transformers/main_classes/deepspeed#configuration) explains how to write a configuration from scratch very well.
 A more complete description of all configuration possibilities is available [here](https://www.deepspeed.ai/docs/config-json/).
diff --git a/examples/audio-classification/README.md b/examples/audio-classification/README.md
index ffc38e6709..143701b087 100644
--- a/examples/audio-classification/README.md
+++ b/examples/audio-classification/README.md
@@ -27,9 +27,6 @@ First, you should install the requirements:
 pip install -r requirements.txt
 ```
 
-> [!NOTE]
-> Please add the flags ENABLE_LB_BUNDLE_ALL_COMPUTE_MME=0 and ENABLE_EXPERIMENTAL_FLAGS=1 for facebook/wav2vec2-base stability issues on gaudi3. Please note this is a workaround for release 1.20 only.
-
 ## Single-HPU
 
 The following command shows how to fine-tune [wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the 🗣️ [Keyword Spotting subset](https://huggingface.co/datasets/superb#ks) of the SUPERB dataset on a single HPU.
diff --git a/examples/audio-classification/requirements.txt b/examples/audio-classification/requirements.txt
index bae36f7451..9367c8e899 100644
--- a/examples/audio-classification/requirements.txt
+++ b/examples/audio-classification/requirements.txt
@@ -1,4 +1,5 @@
-datasets>=1.14.0
-evaluate
+datasets == 3.6.0
+evaluate == 0.4.3
 numba==0.60.0
-librosa
+librosa == 0.10.2.post1
+
diff --git a/examples/audio-classification/run_audio_classification.py b/examples/audio-classification/run_audio_classification.py
index 073b8ad577..801250e27d 100644
--- a/examples/audio-classification/run_audio_classification.py
+++ b/examples/audio-classification/run_audio_classification.py
@@ -47,7 +47,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
 
diff --git a/examples/contrastive-image-text/requirements.txt b/examples/contrastive-image-text/requirements.txt
index 877a4cc85f..d1fff8c979 100644
--- a/examples/contrastive-image-text/requirements.txt
+++ b/examples/contrastive-image-text/requirements.txt
@@ -1 +1 @@
-datasets>=1.8.0
+datasets >= 1.8.0, <= 2.19.2
diff --git a/examples/contrastive-image-text/run_bridgetower.py b/examples/contrastive-image-text/run_bridgetower.py
index 0b98b7a0b2..67042a6191 100644
--- a/examples/contrastive-image-text/run_bridgetower.py
+++ b/examples/contrastive-image-text/run_bridgetower.py
@@ -59,7 +59,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
 
@@ -101,7 +101,7 @@ class ModelArguments:
         },
     )
     trust_remote_code: bool = field(
-        default=False,
+        default=True,
         metadata={
             "help": (
                 "Whether to trust the execution of code from datasets/models defined on the Hub."
diff --git a/examples/contrastive-image-text/run_clip.py b/examples/contrastive-image-text/run_clip.py
index 2e928ec3d6..a35b6cff66 100644
--- a/examples/contrastive-image-text/run_clip.py
+++ b/examples/contrastive-image-text/run_clip.py
@@ -62,7 +62,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
 
@@ -104,7 +104,7 @@ class ModelArguments:
         },
     )
     trust_remote_code: bool = field(
-        default=False,
+        default=True,
         metadata={
             "help": (
                 "Whether to trust the execution of code from datasets/models defined on the Hub."
diff --git a/examples/image-classification/requirements.txt b/examples/image-classification/requirements.txt
index 4cbf42532d..2336488f93 100644
--- a/examples/image-classification/requirements.txt
+++ b/examples/image-classification/requirements.txt
@@ -1,6 +1,6 @@
-torch>=1.5.0
-torchvision>=0.6.0
-datasets>=2.14.0
-evaluate
+torch >= 1.5.0
+torchvision >= 0.6.0
+datasets >= 2.14.0, <= 2.19.2
+evaluate == 0.4.3
 scikit-learn == 1.5.2
 timm>=0.9.16
diff --git a/examples/image-classification/run_image_classification.py b/examples/image-classification/run_image_classification.py
index a82428eb94..940bc19377 100644
--- a/examples/image-classification/run_image_classification.py
+++ b/examples/image-classification/run_image_classification.py
@@ -65,7 +65,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
 
diff --git a/examples/image-to-text/requirements.txt b/examples/image-to-text/requirements.txt
index 4abc5d3998..871e7ad665 100644
--- a/examples/image-to-text/requirements.txt
+++ b/examples/image-to-text/requirements.txt
@@ -3,4 +3,4 @@ Levenshtein
 sentencepiece != 0.1.92
 tiktoken
 blobfile
-datasets
+datasets == 3.6.0
diff --git a/examples/image-to-text/run_image2text_lora_finetune.py b/examples/image-to-text/run_image2text_lora_finetune.py
index 927d58749f..95307f229a 100644
--- a/examples/image-to-text/run_image2text_lora_finetune.py
+++ b/examples/image-to-text/run_image2text_lora_finetune.py
@@ -55,7 +55,7 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 def normalized_levenshtein(s1, s2):
diff --git a/examples/image-to-text/run_pipeline.py b/examples/image-to-text/run_pipeline.py
index 2c5cdbf3e4..6381cc1133 100644
--- a/examples/image-to-text/run_pipeline.py
+++ b/examples/image-to-text/run_pipeline.py
@@ -359,6 +359,7 @@ def main():
             model = AutoModelForVision2Seq.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype)
         if model_type == "mllama":
             model.language_model = initialize_distributed_model(args, model.language_model, logger, model_dtype)
+            model.to("hpu")
         else:
             model = initialize_distributed_model(args, model, logger, model_dtype)
         generator = pipeline(
diff --git a/examples/kubernetes/Dockerfile b/examples/kubernetes/Dockerfile
index 95d29b30bf..77c91c28bf 100644
--- a/examples/kubernetes/Dockerfile
+++ b/examples/kubernetes/Dockerfile
@@ -1,7 +1,7 @@
-ARG GAUDI_SW_VER=1.21.0
+ARG GAUDI_SW_VER=1.22.0
 ARG OS=ubuntu22.04
 ARG TORCH_VER=2.6.0
-ARG OPTIMUM_HABANA_VER=1.18.0
+ARG OPTIMUM_HABANA_VER=1.19.0
 
 FROM vault.habana.ai/gaudi-docker/${GAUDI_SW_VER}/${OS}/habanalabs/pytorch-installer-${TORCH_VER}:latest AS optimum-habana
 
diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md
index 8332098d18..a6b1ed0333 100644
--- a/examples/kubernetes/README.md
+++ b/examples/kubernetes/README.md
@@ -43,12 +43,12 @@ Use the following commands to build the containers:
 
 ```bash
 # Specify the Gaudi SW version, OS, and PyTorch version which will be used for the base container
-export GAUDI_SW_VER=1.21.0
+export GAUDI_SW_VER=1.22.0
 export OS=ubuntu22.04
 export TORCH_VER=2.6.0
 
 # Specify the version of optimum-habana to install in the container
-export OPTIMUM_HABANA_VER=1.18.0
+export OPTIMUM_HABANA_VER=1.19.0
 
 git clone https://github.com/huggingface/optimum-habana.git
 
diff --git a/examples/kubernetes/README.md.gotmpl b/examples/kubernetes/README.md.gotmpl
index d69e412208..534550e74e 100644
--- a/examples/kubernetes/README.md.gotmpl
+++ b/examples/kubernetes/README.md.gotmpl
@@ -43,12 +43,12 @@ Use the following commands to build the containers:
 
 ```bash
 # Specify the Gaudi SW version, OS, and PyTorch version which will be used for the base container
-export GAUDI_SW_VER=1.21.0
+export GAUDI_SW_VER=1.22.0
 export OS=ubuntu22.04
 export TORCH_VER=2.6.0
 
 # Specify the version of optimum-habana to install in the container
-export OPTIMUM_HABANA_VER=1.18.0
+export OPTIMUM_HABANA_VER=1.19.0
 
 git clone https://github.com/huggingface/optimum-habana.git
 
diff --git a/examples/kubernetes/docker-compose.yaml b/examples/kubernetes/docker-compose.yaml
index 8a6c6c2a6c..75844263cc 100644
--- a/examples/kubernetes/docker-compose.yaml
+++ b/examples/kubernetes/docker-compose.yaml
@@ -5,30 +5,30 @@ services:
         http_proxy: ${http_proxy:-""}
         https_proxy: ${https_proxy:-""}
         no_proxy: ${no_proxy:-""}
-        GAUDI_SW_VER: ${GAUDI_SW_VER:-1.21.0}
+        GAUDI_SW_VER: ${GAUDI_SW_VER:-1.22.0}
         OS: ${OS:-ubuntu22.04}
-        OPTIMUM_HABANA_VER:  ${OPTIMUM_HABANA_VER:-1.18.0}
+        OPTIMUM_HABANA_VER:  ${OPTIMUM_HABANA_VER:-1.19.0}
         TORCH_VER: ${TORCH_VER:-2.6.0}
         REGISTRY: ${REGISTRY}
         REPO: ${REPO}
       context: .
       labels:
-        org.opencontainers.base.name: "vault.habana.ai/gaudi-docker/${GAUDI_SW_VER:-1.21.0}/${OS:-ubuntu22.04}/habanalabs/pytorch-installer-${TORCH_VER:-2.6.0}:latest"
+        org.opencontainers.base.name: "vault.habana.ai/gaudi-docker/${GAUDI_SW_VER:-1.22.0}/${OS:-ubuntu22.04}/habanalabs/pytorch-installer-${TORCH_VER:-2.6.0}:latest"
         org.opencontainers.image.title: "Optimum for Intel® Gaudi® Accelerators"
-        org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.18.0}
+        org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.19.0}
     command: >
       sh -c "python -c 'from optimum import habana; print(\"optimum-habana:\", habana.__version__)'"
-    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.18.0}
+    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.19.0}
     pull_policy: always
   optimum-habana-examples:
     build:
       labels:
-        org.opencontainers.base.name: "${REGISTRY}/${REPO}:gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.18.0}"
+        org.opencontainers.base.name: "${REGISTRY}/${REPO}:gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.19.0}"
         org.opencontainers.image.title: "Optimum for Intel® Gaudi® Accelerators Examples"
-        org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.18.0}
+        org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.19.0}
       target: optimum-habana-examples
     command: >
       sh -c "python -c 'from optimum import habana; print(\"optimum-habana:\", habana.__version__)'"
     extends: optimum-habana
-    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.18.0}
+    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.19.0}
 
diff --git a/examples/language-modeling/requirements.txt b/examples/language-modeling/requirements.txt
index aa223dd7f6..a2558a3836 100644
--- a/examples/language-modeling/requirements.txt
+++ b/examples/language-modeling/requirements.txt
@@ -1,6 +1,6 @@
-datasets >= 2.14.0
+datasets >= 2.14.0, <= 2.19.2
 sentencepiece != 0.1.92
-protobuf
-evaluate
+protobuf == 3.20.3
+evaluate == 0.4.3
 scikit-learn == 1.5.2
 peft == 0.12.0
diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
index 1c90b93ed3..373861ebf9 100644
--- a/examples/language-modeling/run_clm.py
+++ b/examples/language-modeling/run_clm.py
@@ -63,7 +63,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 
diff --git a/examples/language-modeling/run_lora_clm.py b/examples/language-modeling/run_lora_clm.py
index 1d4d328139..b22eabba44 100644
--- a/examples/language-modeling/run_lora_clm.py
+++ b/examples/language-modeling/run_lora_clm.py
@@ -70,7 +70,7 @@ def check_optimum_habana_min_version(*a, **b):
 logger = logging.getLogger(__name__)
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 @dataclass
diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py
index 98741f2b4b..3c58cfaa47 100644
--- a/examples/language-modeling/run_mlm.py
+++ b/examples/language-modeling/run_mlm.py
@@ -62,7 +62,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 
diff --git a/examples/language-modeling/run_multitask_prompt_tuning.py b/examples/language-modeling/run_multitask_prompt_tuning.py
index 1cd743a874..5ba6a2ca53 100644
--- a/examples/language-modeling/run_multitask_prompt_tuning.py
+++ b/examples/language-modeling/run_multitask_prompt_tuning.py
@@ -61,7 +61,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risk.
 check_min_version("4.49.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 
diff --git a/examples/language-modeling/run_prompt_tuning_clm.py b/examples/language-modeling/run_prompt_tuning_clm.py
index 1a35196445..bef9984b70 100644
--- a/examples/language-modeling/run_prompt_tuning_clm.py
+++ b/examples/language-modeling/run_prompt_tuning_clm.py
@@ -63,7 +63,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.49.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 
diff --git a/examples/multi-node-training/EFA/Dockerfile b/examples/multi-node-training/EFA/Dockerfile
index 5fe9c2386d..76b4a8cce6 100644
--- a/examples/multi-node-training/EFA/Dockerfile
+++ b/examples/multi-node-training/EFA/Dockerfile
@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
 
 # Installs pdsh and upgrade pip
 RUN apt-get update && apt-get install -y pdsh && \
diff --git a/examples/multi-node-training/GaudiNIC/Dockerfile b/examples/multi-node-training/GaudiNIC/Dockerfile
index e3774c80b1..f7301380f5 100644
--- a/examples/multi-node-training/GaudiNIC/Dockerfile
+++ b/examples/multi-node-training/GaudiNIC/Dockerfile
@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
 
 # Installs pdsh and upgrade pip
 RUN apt-get update && apt-get install -y pdsh && \
diff --git a/examples/protein-folding/requirements.txt b/examples/protein-folding/requirements.txt
index e3b2a3ba45..3521dca2ad 100644
--- a/examples/protein-folding/requirements.txt
+++ b/examples/protein-folding/requirements.txt
@@ -1,2 +1,2 @@
-datasets>=2.14.0
+datasets == 3.6.0
 scikit-learn == 1.5.2
diff --git a/examples/protein-folding/run_esmfold.py b/examples/protein-folding/run_esmfold.py
index 2e2003ab1b..94036a423e 100644
--- a/examples/protein-folding/run_esmfold.py
+++ b/examples/protein-folding/run_esmfold.py
@@ -40,7 +40,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 def convert_outputs_to_pdb(outputs):
diff --git a/examples/protein-folding/run_sequence_classification.py b/examples/protein-folding/run_sequence_classification.py
index 6c69e2f62e..a2aed8bc6a 100644
--- a/examples/protein-folding/run_sequence_classification.py
+++ b/examples/protein-folding/run_sequence_classification.py
@@ -41,7 +41,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
diff --git a/examples/protein-folding/run_zero_shot_eval.py b/examples/protein-folding/run_zero_shot_eval.py
index 83aa1c0ce0..3d3a4edadf 100644
--- a/examples/protein-folding/run_zero_shot_eval.py
+++ b/examples/protein-folding/run_zero_shot_eval.py
@@ -36,7 +36,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 logging.basicConfig(
diff --git a/examples/pytorch-image-models/requirements.txt b/examples/pytorch-image-models/requirements.txt
index c18d628ee5..4ad67f5781 100644
--- a/examples/pytorch-image-models/requirements.txt
+++ b/examples/pytorch-image-models/requirements.txt
@@ -1,2 +1,2 @@
 timm
-datasets
+datasets == 3.6.0
diff --git a/examples/question-answering/requirements.txt b/examples/question-answering/requirements.txt
index 09d7e4bc77..450d9a4cfc 100644
--- a/examples/question-answering/requirements.txt
+++ b/examples/question-answering/requirements.txt
@@ -1,3 +1,3 @@
-datasets >= 2.4.0
+datasets == 3.6.0
 torch >= 1.3.0
-evaluate
+evaluate == 0.4.3
diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py
index 064717d80f..ba70d543d9 100644
--- a/examples/question-answering/run_qa.py
+++ b/examples/question-answering/run_qa.py
@@ -60,7 +60,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
 
diff --git a/examples/question-answering/run_seq2seq_qa.py b/examples/question-answering/run_seq2seq_qa.py
index 374ec915ca..bd4ccf19a3 100644
--- a/examples/question-answering/run_seq2seq_qa.py
+++ b/examples/question-answering/run_seq2seq_qa.py
@@ -57,7 +57,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
 
diff --git a/examples/sentence-transformers-training/nli/requirements.txt b/examples/sentence-transformers-training/nli/requirements.txt
index 680dc8a2bb..1b97e4c3d7 100644
--- a/examples/sentence-transformers-training/nli/requirements.txt
+++ b/examples/sentence-transformers-training/nli/requirements.txt
@@ -1,2 +1,2 @@
-datasets
+datasets <= 2.19.2
 peft
diff --git a/examples/sentence-transformers-training/paraphrases/requirements.txt b/examples/sentence-transformers-training/paraphrases/requirements.txt
index aee11b288a..b776a8dd19 100644
--- a/examples/sentence-transformers-training/paraphrases/requirements.txt
+++ b/examples/sentence-transformers-training/paraphrases/requirements.txt
@@ -1 +1 @@
-datasets
+datasets <= 2.19.2
diff --git a/examples/sentence-transformers-training/sts/requirements.txt b/examples/sentence-transformers-training/sts/requirements.txt
index 680dc8a2bb..1b97e4c3d7 100644
--- a/examples/sentence-transformers-training/sts/requirements.txt
+++ b/examples/sentence-transformers-training/sts/requirements.txt
@@ -1,2 +1,2 @@
-datasets
+datasets <= 2.19.2
 peft
diff --git a/examples/speech-recognition/README.md b/examples/speech-recognition/README.md
index 64fb1d1ebc..69625cc0ab 100644
--- a/examples/speech-recognition/README.md
+++ b/examples/speech-recognition/README.md
@@ -18,13 +18,19 @@ limitations under the License.
 
 ## Table of Contents
 
-- [Automatic Speech Recognition with CTC](#connectionist-temporal-classification)
-	- [Single HPU example](#single-hpu-ctc)
-	- [Multi HPU example](#multi-hpu-ctc)
-- [Automatic Speech Recognition with Sequence-to-Sequence](#sequence-to-sequence)
-	- [Whisper Model](#whisper-model)
-	- [Fine tuning](#single-hpu-whisper-fine-tuning-with-seq2seq)
-	- [Inference](#single-hpu-seq2seq-inference)
+- [Automatic Speech Recognition Examples](#automatic-speech-recognition-examples)
+  - [Table of Contents](#table-of-contents)
+  - [Requirements](#requirements)
+  - [Connectionist Temporal Classification](#connectionist-temporal-classification)
+    - [Single-HPU CTC](#single-hpu-ctc)
+    - [Multi-HPU CTC](#multi-hpu-ctc)
+  - [DeepSpeed](#deepspeed)
+  - [Inference](#inference)
+  - [Sequence to Sequence](#sequence-to-sequence)
+    - [Whisper Model](#whisper-model)
+    - [Single HPU Whisper Fine tuning with Seq2Seq](#single-hpu-whisper-fine-tuning-with-seq2seq)
+    - [Multi HPU Whisper Training with Seq2Seq](#multi-hpu-whisper-training-with-seq2seq)
+      - [Single HPU Seq2Seq Inference](#single-hpu-seq2seq-inference)
 
 
 ## Requirements
diff --git a/examples/speech-recognition/requirements.txt b/examples/speech-recognition/requirements.txt
index 67aeeaaa30..f5c8404aa4 100644
--- a/examples/speech-recognition/requirements.txt
+++ b/examples/speech-recognition/requirements.txt
@@ -1,5 +1,5 @@
-datasets >= 1.18.0
+datasets >= 1.18.0, <= 2.19.2
 numba==0.60.0
-librosa
-jiwer
-evaluate
+librosa == 0.10.2.post1
+jiwer == 3.0.4
+evaluate == 0.4.3
diff --git a/examples/speech-recognition/run_speech_recognition_ctc.py b/examples/speech-recognition/run_speech_recognition_ctc.py
index 3e40517af3..5afe55a335 100644
--- a/examples/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/speech-recognition/run_speech_recognition_ctc.py
@@ -59,7 +59,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
 
@@ -192,6 +192,10 @@ class DataTrainingArguments:
     dataset_name: str = field(
         metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
     )
+    dataset_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Optional path to a local dataset directory (e.g. extracted LibriSpeech)."},
+    )
     dataset_config_name: str = field(
         default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
     )
@@ -488,13 +492,18 @@ def main():
     # 1. First, let's load the dataset
     raw_datasets = DatasetDict()
 
-    raw_datasets["train"] = load_dataset(
-        data_args.dataset_name,
-        data_args.dataset_config_name,
-        split=data_args.train_split_name,
-        token=data_args.token,
-        trust_remote_code=data_args.trust_remote_code,
-    )
+    load_dataset_kwargs = {
+        "path": data_args.dataset_name,
+        "name": data_args.dataset_config_name,
+        "split": data_args.train_split_name,
+        "token": data_args.token,
+        "trust_remote_code": data_args.trust_remote_code,
+    }
+    if data_args.dataset_dir is not None:
+        load_dataset_kwargs["data_dir"] = data_args.dataset_dir
+        logger.info(f"Loading dataset from local cache directory: {data_args.dataset_dir}")
+
+    raw_datasets["train"] = load_dataset(**load_dataset_kwargs)
 
     if data_args.audio_column_name not in raw_datasets["train"].column_names:
         raise ValueError(
@@ -514,13 +523,8 @@ def main():
         raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
 
     if training_args.do_eval:
-        raw_datasets["eval"] = load_dataset(
-            data_args.dataset_name,
-            data_args.dataset_config_name,
-            split=data_args.eval_split_name,
-            token=data_args.token,
-            trust_remote_code=data_args.trust_remote_code,
-        )
+        load_dataset_kwargs["split"] = data_args.eval_split_name
+        raw_datasets["eval"] = load_dataset(**load_dataset_kwargs)
 
         if data_args.max_eval_samples is not None:
             raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
diff --git a/examples/speech-recognition/run_speech_recognition_seq2seq.py b/examples/speech-recognition/run_speech_recognition_seq2seq.py
index f52bd73887..562290413b 100755
--- a/examples/speech-recognition/run_speech_recognition_seq2seq.py
+++ b/examples/speech-recognition/run_speech_recognition_seq2seq.py
@@ -56,7 +56,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
 
diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index f9753fe246..b4d4e46557 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -84,7 +84,7 @@ Stable Diffusion 3 was introduced by Stability AI [here](https://stability.ai/ne
 It uses Diffusion Transformer instead of UNet for denoising, which yields improved image quality.
 
 ```bash
-PT_HPU_LAZY_MODE=1 \
+PT_HPU_LAZY_MODE=1 PT_HPU_MAX_COMPOUND_OP_SIZE=1 \
 python text_to_image_generation.py \
     --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers \
     --prompts "Sailing ship painting by Van Gogh" \
@@ -480,4 +480,4 @@ PT_HPU_LAZY_MODE=1 python text_to_video_generation.py \
  - **Batch Size Limitation**: Due to a known issue, batch sizes for some Stable Diffusion models need to be reduced.
    This issue is expected to be resolved in a future release.
 
-- **Image-to-Video ControlNet**: The Image-to-Video ControlNet command is currently not supported on Gaudi3.
+- **Image-to-Video ControlNet**: The Image-to-Video ControlNet command is currently not supported on Gaudi3.
\ No newline at end of file
diff --git a/examples/stable-diffusion/depth_to_image_generation.py b/examples/stable-diffusion/depth_to_image_generation.py
index fcd89257d1..0443bc5080 100755
--- a/examples/stable-diffusion/depth_to_image_generation.py
+++ b/examples/stable-diffusion/depth_to_image_generation.py
@@ -41,7 +41,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/image_to_image_generation.py b/examples/stable-diffusion/image_to_image_generation.py
index 9542931b11..f55c01a6f9 100755
--- a/examples/stable-diffusion/image_to_image_generation.py
+++ b/examples/stable-diffusion/image_to_image_generation.py
@@ -41,7 +41,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/image_to_video_generation.py b/examples/stable-diffusion/image_to_video_generation.py
index 3aacdb51a1..c2be57980a 100755
--- a/examples/stable-diffusion/image_to_video_generation.py
+++ b/examples/stable-diffusion/image_to_video_generation.py
@@ -38,7 +38,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
new file mode 100644
index 0000000000..91a74c633c
--- /dev/null
+++ b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
@@ -0,0 +1,18871 @@
+{
+    "GlobalRank": null,
+    "LocalRank": null,
+    "Mode": "DynamicRange",
+    "Nodes": {
+        "conv_in": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.4765625
+                    ]
+                ]
+            }
+        },
+        "time_embedding.linear_1": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.171875
+                    ]
+                ]
+            }
+        },
+        "time_embedding.linear_2": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "add_embedding.linear_1": {
+            "inputs": [
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.390625
+                    ]
+                ]
+            }
+        },
+        "add_embedding.linear_2": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.57421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.57421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.486328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.65234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.294921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.41796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.downsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        6.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        7.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2314453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ],
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        314.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.287109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10302734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        107.5
+                    ]
+                ],
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        856.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2255859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        32.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1865234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        10.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        10.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ],
+                [
+                    [
+                        9.5
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        6.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        376.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.33203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2412109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        97.0
+                    ]
+                ],
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        17.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2576.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        9.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        70.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.291015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        20.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1083984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        11.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1748046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.248046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.96875
+                    ]
+                ],
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        199.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0986328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1884765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        135.0
+                    ]
+                ],
+                [
+                    [
+                        4.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.953125
+                    ]
+                ],
+                [
+                    [
+                        15.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1864.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.953125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        19.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.35546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.875
+                    ]
+                ],
+                [
+                    [
+                        8.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        173.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09716796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1943359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        152.0
+                    ]
+                ],
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1792.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        33.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.22265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        22.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        15.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.8203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.271484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        19.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.1953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.326171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.322265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.downsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        27.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.25390625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.359375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.359375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.359375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ],
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.8125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        282.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09521484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2333984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.119140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        12.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        104.0
+                    ]
+                ],
+                [
+                    [
+                        7.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        21.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1904.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.96875
+                    ]
+                ],
+                [
+                    [
+                        6.28125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        8.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        155.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10400390625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2353515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05322265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        95.0
+                    ]
+                ],
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        21.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1192.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        2.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        18.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1748046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ],
+                [
+                    [
+                        7.65625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        194.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09912109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12353515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0439453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        126.0
+                    ]
+                ],
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        980.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2119140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.5625
+                    ]
+                ],
+                [
+                    [
+                        7.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.96875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        188.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10888671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11767578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.049072265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        122.0
+                    ]
+                ],
+                [
+                    [
+                        6.28125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2528.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.119140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ],
+                [
+                    [
+                        7.15625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        6.96875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        185.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0810546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1044921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.796875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.04638671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        110.0
+                    ]
+                ],
+                [
+                    [
+                        4.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1448.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12451171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ],
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        172.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08349609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09716796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09228515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.040771484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        65.0
+                    ]
+                ],
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        26.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1104.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.890625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.21875
+                    ]
+                ],
+                [
+                    [
+                        6.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        149.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        20.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.080078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06884765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.049560546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        1.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0260009765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        43.25
+                    ]
+                ],
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ],
+                [
+                    [
+                        20.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        940.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11962890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11767578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.9375
+                    ]
+                ],
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        5.03125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        145.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        18.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08740234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0693359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.296875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.039794921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        42.75
+                    ]
+                ],
+                [
+                    [
+                        3.734375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.953125
+                    ]
+                ],
+                [
+                    [
+                        24.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        988.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.953125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.734375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11865234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11083984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.8125
+                    ]
+                ],
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        5.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        139.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        19.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.068359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.083984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.059326171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        1.515625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.039794921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        49.75
+                    ]
+                ],
+                [
+                    [
+                        3.921875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        24.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1368.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        24.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.0625
+                    ]
+                ],
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        129.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07275390625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.087890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        63.5
+                    ]
+                ],
+                [
+                    [
+                        3.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        32.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1312.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        30.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        44.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11962890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.490234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12255859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2294921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.90625
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        5.4375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        230.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10498046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2060546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2021484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        131.0
+                    ]
+                ],
+                [
+                    [
+                        6.9375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1608.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        34.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1787109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ],
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        6.15625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        166.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1474609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07568359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        117.0
+                    ]
+                ],
+                [
+                    [
+                        7.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1528.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.453125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1787109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        30.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ],
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        6.8125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        157.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.224609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        17.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07568359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        134.0
+                    ]
+                ],
+                [
+                    [
+                        9.5
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1400.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        46.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ],
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ],
+                [
+                    [
+                        5.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        157.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07373046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        142.0
+                    ]
+                ],
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1624.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        54.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.15625
+                    ]
+                ],
+                [
+                    [
+                        5.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.96875
+                    ]
+                ],
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        152.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.96875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.212890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        18.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.064453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        139.0
+                    ]
+                ],
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2304.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        87.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.166015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12451171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ],
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        176.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.21484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        12.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06396484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        168.0
+                    ]
+                ],
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        26.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1872.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        92.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ],
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        166.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09033203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        28.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0576171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        156.0
+                    ]
+                ],
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2096.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        111.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12255859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.875
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9609375
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        139.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9609375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        20.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        142.0
+                    ]
+                ],
+                [
+                    [
+                        5.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        34.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2368.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        107.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.34375
+                    ]
+                ],
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.953125
+                    ]
+                ],
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        164.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.953125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0986328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        17.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0595703125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        151.0
+                    ]
+                ],
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        4160.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        98.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.625
+                    ]
+                ],
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        223.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08349609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11962890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        16.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.055419921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        119.0
+                    ]
+                ],
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        39.5
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        3680.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        55.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        28.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07666015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.58203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.65625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        84.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.66796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        9.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.341796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2001953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ],
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        6.9375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        372.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09521484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1708984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.052734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        85.0
+                    ]
+                ],
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        33.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        732.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.015625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.173828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        31.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.25
+                    ]
+                ],
+                [
+                    [
+                        10.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        312.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1708984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05908203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        88.0
+                    ]
+                ],
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        864.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.28125
+                    ]
+                ],
+                [
+                    [
+                        8.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        236.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.51953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        108.0
+                    ]
+                ],
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        840.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        34.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ],
+                [
+                    [
+                        9.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        231.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        18.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        127.0
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        18.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        924.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        44.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.197265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.71875
+                    ]
+                ],
+                [
+                    [
+                        7.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.15625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        190.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        19.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12060546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.072265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        106.5
+                    ]
+                ],
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        888.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        54.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2275390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.294921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.625
+                    ]
+                ],
+                [
+                    [
+                        7.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        204.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        20.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06787109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        96.5
+                    ]
+                ],
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1168.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        53.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.197265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.353515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ],
+                [
+                    [
+                        7.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        198.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.061767578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        98.0
+                    ]
+                ],
+                [
+                    [
+                        3.96875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2144.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        56.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.216796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.30859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.25
+                    ]
+                ],
+                [
+                    [
+                        7.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        171.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.052734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        93.5
+                    ]
+                ],
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1784.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        51.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.59375
+                    ]
+                ],
+                [
+                    [
+                        7.15625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        8.8125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        143.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        19.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0947265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09130859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        138.0
+                    ]
+                ],
+                [
+                    [
+                        5.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        39.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        5216.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        50.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.33984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        10.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        10.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.302734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ],
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.1875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        151.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        23.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11181640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0810546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.080078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        91.0
+                    ]
+                ],
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        31.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        3392.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2021484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        46.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        162.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.28515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.31640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ],
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        228.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09521484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        79.5
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        648.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.5625
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9765625
+                    ]
+                ],
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        241.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9765625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06884765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        65.0
+                    ]
+                ],
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        684.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.248046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.185546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ],
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ],
+                [
+                    [
+                        7.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        184.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        15.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.181640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        126.0
+                    ]
+                ],
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        904.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        45.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.20703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2216796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.9375
+                    ]
+                ],
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        182.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        15.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06787109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        154.0
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        18.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1360.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.275390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        86.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2431640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2353515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ],
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        7.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        173.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        118.5
+                    ]
+                ],
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1072.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        39.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2490234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ],
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9765625
+                    ]
+                ],
+                [
+                    [
+                        7.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        194.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9765625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0732421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        117.5
+                    ]
+                ],
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1816.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        43.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        10.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.150390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        10.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.345703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.09375
+                    ]
+                ],
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        204.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0634765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        199.0
+                    ]
+                ],
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2320.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        53.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.267578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.875
+                    ]
+                ],
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        211.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1005859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.059326171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        144.0
+                    ]
+                ],
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2640.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2275390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        56.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        10.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        10.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.9375
+                    ]
+                ],
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        9.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        201.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09423828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        123.5
+                    ]
+                ],
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        28.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2944.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        55.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.27734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        11.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        11.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.25
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.625
+                    ]
+                ],
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        9.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        136.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.103515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.076171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        97.5
+                    ]
+                ],
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        34.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2336.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        43.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.37890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        120.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11865234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.proj_in": {
+            "inputs": [
+                [
+                    [
+                        12.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.59375
+                    ]
+                ],
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        208.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10009765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.341796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.421875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        116.5
+                    ]
+                ],
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        31.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1080.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        16.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.365234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.34375
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        227.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.55078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07373046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        80.5
+                    ]
+                ],
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        42.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1056.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.185546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.244140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.263671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.84375
+                    ]
+                ],
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        6.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        227.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.412109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08056640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        98.0
+                    ]
+                ],
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        49.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        676.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.173828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        35.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.34765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.625
+                    ]
+                ],
+                [
+                    [
+                        8.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        189.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.4921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.203125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        105.5
+                    ]
+                ],
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        924.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        37.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.09375
+                    ]
+                ],
+                [
+                    [
+                        7.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        163.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10791015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.109375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        76.0
+                    ]
+                ],
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        17.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        648.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        52.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.28125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ],
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        187.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0947265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10205078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.234375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05810546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        79.0
+                    ]
+                ],
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        828.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1376953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.30859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ],
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        154.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0810546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11865234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08154296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.453125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.04638671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        60.25
+                    ]
+                ],
+                [
+                    [
+                        4.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        34.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1480.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        50.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.32421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.09375
+                    ]
+                ],
+                [
+                    [
+                        8.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        177.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0869140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08642578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0419921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        61.75
+                    ]
+                ],
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1536.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        43.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.38671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.208984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2119140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.90625
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ],
+                [
+                    [
+                        10.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        211.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98828125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08447265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.043701171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        70.5
+                    ]
+                ],
+                [
+                    [
+                        3.8125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        38.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1952.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        42.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2470703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.96875
+                    ]
+                ],
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        188.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        19.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1787109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07763671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        80.0
+                    ]
+                ],
+                [
+                    [
+                        4.28125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        46.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        3088.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        43.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.proj_out": {
+            "inputs": [
+                [
+                    [
+                        174.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        10.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.435546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.62109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        13.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.384765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        59.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        11.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.53515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.76171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        11.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.50390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        94.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.91015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.82421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.conv2": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        94.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.upsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        67.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.400390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        6.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.20703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.197265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.34765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        9.125
+                    ]
+                ],
+                [
+                    [
+                        10.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ],
+                [
+                    [
+                        7.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        268.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1376953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        91.5
+                    ]
+                ],
+                [
+                    [
+                        4.1875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        836.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        29.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.380859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        12.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        12.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.380859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        9.875
+                    ]
+                ],
+                [
+                    [
+                        11.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        7.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        412.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.244140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10498046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        17.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.314453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        159.0
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        27.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1336.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        9.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1962890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        33.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        34.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        7.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.205078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.173828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        9.125
+                    ]
+                ],
+                [
+                    [
+                        9.3125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        272.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        9.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        108.0
+                    ]
+                ],
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        21.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        984.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.326171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        9.5
+                    ]
+                ],
+                [
+                    [
+                        10.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        508.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.22265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12451171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        10.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.291015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        141.0
+                    ]
+                ],
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1376.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.248046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        68.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        21.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.proj_in": {
+            "inputs": [
+                [
+                    [
+                        9.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2255859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        10.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2314453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        10.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.875
+                    ]
+                ],
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        6.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        243.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.255859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12353515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        14.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        170.0
+                    ]
+                ],
+                [
+                    [
+                        3.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        24.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        968.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        8.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        15.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.263671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1748046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.427734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ],
+                [
+                    [
+                        10.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ],
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        314.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.98046875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        14.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        16.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        117.0
+                    ]
+                ],
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        37.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1448.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        29.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.30859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.proj_out": {
+            "inputs": [
+                [
+                    [
+                        22.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        12.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.0078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        11.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.59375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        146.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        7.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.470703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        42.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.24609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.conv1": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.64453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.7578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.conv2": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.263671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        31.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.upsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.87890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        12.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.90625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        54.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        12.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.66015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        25.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2314453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.conv1": {
+            "inputs": [
+                [
+                    [
+                        21.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.78515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.conv2": {
+            "inputs": [
+                [
+                    [
+                        9.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.9140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        13.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.25
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ],
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        274.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07470703125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12109375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1064453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        83.0
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1216.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        2.640625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        20.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ],
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        7.59375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        215.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1064453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.042236328125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        83.5
+                    ]
+                ],
+                [
+                    [
+                        5.21875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1120.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.484375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1767578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.90625
+                    ]
+                ],
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        233.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.083984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0439453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        72.0
+                    ]
+                ],
+                [
+                    [
+                        4.03125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        864.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.546875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1376953125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.4375
+                    ]
+                ],
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        219.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10791015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.03515625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        62.0
+                    ]
+                ],
+                [
+                    [
+                        3.921875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1184.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        29.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1708984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ],
+                [
+                    [
+                        9.125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        212.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0576171875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09619140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0693359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.359375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0322265625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        60.5
+                    ]
+                ],
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        32.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1080.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        37.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.150390625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.96875
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        189.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0556640625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08642578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06591796875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.546875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.033935546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        47.5
+                    ]
+                ],
+                [
+                    [
+                        3.46875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        27.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1072.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        34.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ],
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        175.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.059814453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08349609375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.055908203125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.030517578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        61.75
+                    ]
+                ],
+                [
+                    [
+                        3.90625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ],
+                [
+                    [
+                        31.5
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1528.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.984375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        44.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ],
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        153.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.054443359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05029296875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        10.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.031982421875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        57.25
+                    ]
+                ],
+                [
+                    [
+                        3.40625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        30.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1080.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        42.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.84375
+                    ]
+                ],
+                [
+                    [
+                        8.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ],
+                [
+                    [
+                        11.375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        179.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.99609375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05517578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0830078125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.052734375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        1.7109375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0296630859375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        61.0
+                    ]
+                ],
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ],
+                [
+                    [
+                        26.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2016.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.9921875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        56.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.15625
+                    ]
+                ],
+                [
+                    [
+                        7.71875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        153.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.046630859375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08740234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.04248046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        1.0078125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.026123046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        63.0
+                    ]
+                ],
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        0.90234375
+                    ]
+                ],
+                [
+                    [
+                        28.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2224.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        0.90234375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        51.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        26.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.41015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.53515625
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        11.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.71484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        8.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.482421875
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.640625
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        13.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.61328125
+                    ]
+                ]
+            }
+        },
+        "conv_out": {
+            "inputs": [
+                [
+                    [
+                        11.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.21875
+                    ]
+                ]
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz
new file mode 100644
index 0000000000..2e6ad5c196
Binary files /dev/null and b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz differ
diff --git a/examples/stable-diffusion/quantization/measure_config.json b/examples/stable-diffusion/quantization/measure_config.json
new file mode 100755
index 0000000000..04576eeb46
--- /dev/null
+++ b/examples/stable-diffusion/quantization/measure_config.json
@@ -0,0 +1,6 @@
+{
+    "method": "HOOKS",
+    "mode": "MEASURE",
+    "observer": "maxabs",
+    "dump_stats_path": "./quantization/measure/fp8"
+}
diff --git a/examples/stable-diffusion/quantization/quant_config.json b/examples/stable-diffusion/quantization/quant_config.json
new file mode 100755
index 0000000000..b372905d7f
--- /dev/null
+++ b/examples/stable-diffusion/quantization/quant_config.json
@@ -0,0 +1,7 @@
+{
+    "method": "HOOKS",
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "maxabs_hw",
+    "dump_stats_path": "./quantization/measure/fp8"
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/requirements.txt b/examples/stable-diffusion/requirements.txt
index ed24d8c1b7..a8e8750e3f 100644
--- a/examples/stable-diffusion/requirements.txt
+++ b/examples/stable-diffusion/requirements.txt
@@ -1,3 +1,3 @@
-opencv-python
+opencv-python == 4.10.0.84
 compel
 sentencepiece
diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index e3b0beed48..aac565dcd5 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -42,7 +42,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/text_to_video_generation.py b/examples/stable-diffusion/text_to_video_generation.py
index 144727cbc1..5ab6bf8697 100755
--- a/examples/stable-diffusion/text_to_video_generation.py
+++ b/examples/stable-diffusion/text_to_video_generation.py
@@ -37,7 +37,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/training/requirements.txt b/examples/stable-diffusion/training/requirements.txt
index 7f7e4a2d0e..9a419fde22 100644
--- a/examples/stable-diffusion/training/requirements.txt
+++ b/examples/stable-diffusion/training/requirements.txt
@@ -1,6 +1,6 @@
 compel
-datasets
-imagesize
+datasets == 3.6.0
+imagesize == 1.4.1
 opencv-python
 peft==0.16.0
 sentencepiece
diff --git a/examples/stable-diffusion/training/train_controlnet.py b/examples/stable-diffusion/training/train_controlnet.py
index d6c1a391e9..cb199802bf 100755
--- a/examples/stable-diffusion/training/train_controlnet.py
+++ b/examples/stable-diffusion/training/train_controlnet.py
@@ -67,7 +67,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 # Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 if is_wandb_available():
     import wandb
 
diff --git a/examples/stable-diffusion/unconditional_image_generation.py b/examples/stable-diffusion/unconditional_image_generation.py
index 979f60b838..174ea398b4 100755
--- a/examples/stable-diffusion/unconditional_image_generation.py
+++ b/examples/stable-diffusion/unconditional_image_generation.py
@@ -20,7 +20,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 # Setup logging
 logging.basicConfig(
diff --git a/examples/summarization/requirements.txt b/examples/summarization/requirements.txt
index 7f9dc2a9c4..8cbb65a9b4 100644
--- a/examples/summarization/requirements.txt
+++ b/examples/summarization/requirements.txt
@@ -1,8 +1,8 @@
-datasets >= 2.4.0
+datasets >= 2.4.0, <= 2.19.2
 sentencepiece != 0.1.92
-protobuf
-rouge-score
-nltk
-py7zr
+protobuf == 3.20.3
+rouge-score == 0.1.2
+nltk == 3.8.1
+py7zr == 0.21.0
 torch >= 1.3
-evaluate
+evaluate == 0.4.3
diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py
index 87e5faa9d1..58aab7db09 100755
--- a/examples/summarization/run_summarization.py
+++ b/examples/summarization/run_summarization.py
@@ -65,7 +65,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
 
diff --git a/examples/text-classification/requirements.txt b/examples/text-classification/requirements.txt
index 7ce7d0ba42..4890b36c21 100644
--- a/examples/text-classification/requirements.txt
+++ b/examples/text-classification/requirements.txt
@@ -1,7 +1,8 @@
-datasets >= 2.4.0
+datasets == 3.6.0
 sentencepiece != 0.1.92
-scipy
+scipy == 1.13.1
 scikit-learn == 1.5.2
-protobuf
+protobuf == 5.29.4
+tensorboard == 2.19.0
 torch >= 1.3
-evaluate
+evaluate == 0.4.3
diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
index be36e601ad..dc641838c5 100755
--- a/examples/text-classification/run_glue.py
+++ b/examples/text-classification/run_glue.py
@@ -58,7 +58,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
 
diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md
index 73e9ad0e35..8f60425907 100755
--- a/examples/text-generation/README.md
+++ b/examples/text-generation/README.md
@@ -816,13 +816,7 @@ pip install -r requirements_lm_eval.txt
 ```
 
 > [!NOTE]
-> Please add the flags for following models to improve accuracy when using lm_eval on gaudi2. Please note this is a workaround for 1.20 release only.
->
-> ENABLE_LB_BUNDLE_ALL_COMPUTE_MME=0 COMPLEXGUID_DISABLE_RMS_NORM=true ENABLE_EXPERIMENTAL_FLAGS=true for llama-2-70b-hf[PTQ fp8]
->
-> COMPLEXGUID_DISABLE_RMS_NORM=true ENABLE_EXPERIMENTAL_FLAGS=true for Llama-3.1-70B-Instruct[PTQ fp8] and llama-2-70b-hf[bf16]
->
-> If custom models on hub is being used, please set env variable HF_DATASETS_TRUST_REMOTE_CODE=true instead of arg --trust_remote_code with the installed lm_eval version and dependency datasets==3.6.0
+> If custom models on hub is being used, please set env variable HF_DATASETS_TRUST_REMOTE_CODE=true instead of arg --trust_remote_code with the installed lm_eval version and dependency datasets==2.21.0
 
 
 ### Examples
diff --git a/examples/text-generation/model_adapter.py b/examples/text-generation/model_adapter.py
index e8653a2431..d492aa6505 100644
--- a/examples/text-generation/model_adapter.py
+++ b/examples/text-generation/model_adapter.py
@@ -18,15 +18,23 @@
 ###############################################################################
 
 import argparse
-from typing import Literal, Optional
+import logging
+from typing import List, Literal, Optional, Union
 
 import torch
 import torch.nn.functional as F
+from lm_eval.api.instance import Instance
 from lm_eval.models.huggingface import HFLM, TemplateLM
+from lm_eval.models.utils import get_dtype, stop_sequences_criteria
+
+# Local imports
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
 
 
+logger = logging.getLogger(__name__)
+
+
 class HabanaModelAdapter(HFLM):
     def __init__(
         self,
@@ -35,10 +43,18 @@ def __init__(
         args: argparse.Namespace,
         options: GenerationConfig,
         backend: Literal["default", "causal", "seq2seq"] = "default",
+        truncation: Optional[bool] = False,
         logits_cache: bool = True,
+        max_length: Optional[int] = None,
+        softmax_dtype: Union[str, torch.dtype, None] = None,
         add_bos_token: Optional[bool] = True,
         prefix_token_id: Optional[int] = None,
         delta: Optional[str] = None,
+        # end token for thinking, either the string or int token id.
+        # splits to get response after this token (if provided).
+        think_end_token: Optional[Union[str, int]] = None,
+        enable_thinking: Optional[bool] = None,
+        chat_template_args: Optional[dict] = None,
         **kwargs,
     ) -> None:
         # To skip cuda code of the HFLM init
@@ -54,11 +70,32 @@ def __init__(
         self.peft = args.peft_model
         self.delta = delta
         self.custom_prefix_token_id = prefix_token_id
+        if isinstance(think_end_token, str) and think_end_token.isdigit():
+            self.think_end_token = int(think_end_token)
+        else:
+            self.think_end_token = think_end_token
+
+        self.chat_template_args = chat_template_args or {}
+        if enable_thinking is not None:
+            self.chat_template_args.update({"enable_thinking": enable_thinking})
+
         # determine which of 'causal' and 'seq2seq' backends to use for HF models
         self._get_backend(config=self._config, backend=backend, trust_remote_code=args.trust_remote_code)
+        self.truncation = truncation
         self.logits_cache = logits_cache
         self.add_bos_token = add_bos_token
-        self._max_length = options.max_length
+        self._max_length = max_length
+        self.softmax_dtype = get_dtype(softmax_dtype) if softmax_dtype is not None else None
+        self.hpu_graphs = args.use_hpu_graphs
+        self.use_lazy_mode = True
+        if args.torch_compile:
+            self.use_lazy_mode = False
+        self.vocab_size = self._model.config.vocab_size
+        if "gemma" in getattr(self._config, "model_type", ""):
+            self.add_bos_token = True
+            logger.info(
+                f"Model type is '{self._config.model_type}', part of the Gemma family--a BOS token will be used as Gemma underperforms without it."
+            )
         self.batch_size_per_gpu = int(args.batch_size)
         self.revision = args.model_revision
         self.model_inputs = {"use_cache": self.options.use_cache}
@@ -119,7 +156,8 @@ def eot_token_id(self) -> int:
 
     @property
     def max_length(self) -> int:
-        return self.buckets[-1]
+        # Legacy
+        return self._max_length if self._max_length else self.buckets[-1]
 
     @property
     def device(self):
@@ -127,8 +165,18 @@ def device(self):
         # Returning 'cpu' to keep tensors on CPU in lm_eval code
         return "cpu"
 
-    def find_bucket(self, length: int) -> list[int]:
-        return [b for b in self.buckets if b >= length][0]
+    @max_length.setter
+    def max_length(self, value: int) -> None:
+        self._max_length = value
+
+    def find_bucket(self, length: int, key=lambda b, length: b >= length) -> int:
+        for b in self.buckets:
+            if key(b, length):
+                return b
+        new_bucket = length
+        self.buckets.append(new_bucket)
+        self.buckets.sort()
+        return new_bucket
 
     def _model_call(self, inps: torch.Tensor) -> torch.Tensor:
         bs, seq_length = inps.shape
@@ -144,8 +192,60 @@ def _model_call(self, inps: torch.Tensor) -> torch.Tensor:
         if self.options.static_shapes and padding_length > 0:
             logits = logits[:, :-padding_length, :]
         logits = logits.to(torch.float32)
+
         return logits
 
+    def generate_until(self, requests: List[Instance], disable_tqdm: bool = False) -> List[str]:
+        """
+        Override to change only max_length property
+        """
+        legacy_max_length = self.max_length
+        self.max_length = super().max_length
+        # Call the parent class's implementation for the unchanged parts
+        res = super().generate_until(requests, disable_tqdm)
+        self.max_length = legacy_max_length
+        return res
+
+    def _model_generate(self, context, max_length, stop, **generation_kwargs):
+        """
+        Patched method
+        source: https://github.com/EleutherAI/lm-evaluation-harness/blob/v0.4.7/lm_eval/models/huggingface.py/#L858
+        """
+
+        # temperature = 0.0 if not set
+        # if do_sample is false and temp==0.0:
+        # remove temperature, as do_sample=False takes care of this
+        # and we don't want a warning from HF
+        generation_kwargs["temperature"] = generation_kwargs.get("temperature", 0.0)
+        do_sample = generation_kwargs.get("do_sample", None)
+
+        # The temperature has to be a strictly positive float -- if it is 0.0, use greedy decoding strategies
+        if generation_kwargs.get("temperature") == 0.0 and do_sample is None:
+            generation_kwargs["do_sample"] = do_sample = False
+
+        if do_sample is False and generation_kwargs.get("temperature") == 0.0:
+            generation_kwargs.pop("temperature")
+        # build stopping criteria
+        stopping_criteria = stop_sequences_criteria(self.tokenizer, stop, context.shape[1], context.shape[0])
+        # to avoid graph recompilation
+        if self.options.static_shapes:
+            self.options.bucket_internal = True
+            _ = self.find_bucket(context.shape[1])
+            max_gen_toks = max_length - context.shape[1]
+        # move context & attention_mask to hpu
+        context = context.to("hpu")
+        generation_kwargs["attention_mask"] = generation_kwargs["attention_mask"].to("hpu")
+        return self.model.generate(
+            input_ids=context,
+            max_new_tokens=max_gen_toks,
+            stopping_criteria=stopping_criteria,
+            pad_token_id=self.tokenizer.pad_token_id,
+            use_cache=True,
+            hpu_graphs=self.hpu_graphs,
+            lazy_mode=self.use_lazy_mode,
+            **generation_kwargs,
+        )
+
     def get_model_info(self) -> dict:
         """
         Patched method to get Hugging Face model information for experiment reproducibility.
diff --git a/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization.json b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization.json
new file mode 100644
index 0000000000..62d9150d63
--- /dev/null
+++ b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization.json
@@ -0,0 +1,13 @@
+{
+    "mode": "QUANTIZE",
+    "scale_method": "ACT_MAXABS_PCS_POW2_WEIGHT_MAXABS_PTS_POW2_HW",
+    "scale_format": "CONST",
+    "allowlist": {
+        "types": [],
+        "names": [
+            "mlp"
+        ]
+    },
+    "dynamic_quantization": "True",
+    "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization_pts.json b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization_pts.json
new file mode 100644
index 0000000000..69f41da153
--- /dev/null
+++ b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization_pts.json
@@ -0,0 +1,13 @@
+{
+    "mode": "QUANTIZE",
+    "scale_method": "maxabs_pow2",
+    "scale_format": "CONST",
+    "allowlist": {
+        "types": [],
+        "names": [
+            "mlp"
+        ]
+    },
+    "dynamic_quantization": "True",
+    "dump_stats_path": "./hqt_output/measure"
+}
\ No newline at end of file
diff --git a/examples/text-generation/quantization_config/maxabs_quant_qdq.json b/examples/text-generation/quantization_config/maxabs_quant_qdq.json
new file mode 100644
index 0000000000..7b87c0d8d8
--- /dev/null
+++ b/examples/text-generation/quantization_config/maxabs_quant_qdq.json
@@ -0,0 +1,9 @@
+{
+    "method": "HOOKS",
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "maxabs_hw",
+    "scale_format": "SCALAR",
+    "dump_stats_path": "./hqt_output/measure",
+    "use_qdq": "True"
+}
\ No newline at end of file
diff --git a/examples/text-generation/requirements.txt b/examples/text-generation/requirements.txt
index 44aebd041a..3d800cc73b 100644
--- a/examples/text-generation/requirements.txt
+++ b/examples/text-generation/requirements.txt
@@ -1,5 +1,5 @@
-datasets
-peft
+datasets == 3.6.0
+peft == 0.11.1
 sentencepiece
 tiktoken
-blobfile
\ No newline at end of file
+blobfile
diff --git a/examples/text-generation/requirements_evaluation.txt b/examples/text-generation/requirements_evaluation.txt
new file mode 100644
index 0000000000..596d3f8463
--- /dev/null
+++ b/examples/text-generation/requirements_evaluation.txt
@@ -0,0 +1,5 @@
+evaluate == 0.4.3
+rouge_score == 0.1.2
+pandas <= 2.2.2
+sentencepiece
+nltk==3.8.1
diff --git a/examples/text-generation/requirements_lm_eval.txt b/examples/text-generation/requirements_lm_eval.txt
index 3f1a08bcc4..de1bb95acd 100644
--- a/examples/text-generation/requirements_lm_eval.txt
+++ b/examples/text-generation/requirements_lm_eval.txt
@@ -1,5 +1,11 @@
-lm-eval==0.4.7
-datasets==3.6.0
+lm-eval==0.4.9.1
+datasets == 3.6.0
+evaluate == 0.4.3
+rouge_score == 0.1.2
+accelerate
+pandas <= 2.2.2
+sentencepiece <= 0.2.0
+langdetect <= 1.0.9
+immutabledict <= 4.2.1
 tiktoken
 blobfile
-sentencepiece
\ No newline at end of file
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
old mode 100755
new mode 100644
index a0da20b1f5..85a9597879
--- a/examples/text-generation/run_generation.py
+++ b/examples/text-generation/run_generation.py
@@ -278,17 +278,26 @@ def setup_parser(parser):
     )
     parser.add_argument(
         "--use_flash_attention",
-        action="store_true",
+        nargs="?",
+        const=True,
+        default=False,
+        action=SetTrueOrFalseOrNone,
         help="Whether to enable Habana Flash Attention, provided that the model supports it.",
     )
     parser.add_argument(
         "--flash_attention_recompute",
-        action="store_true",
+        nargs="?",
+        const=True,
+        default=False,
+        action=SetTrueOrFalseOrNone,
         help="Whether to enable Habana Flash Attention in recompute mode on first token generation. This gives an opportunity of splitting graph internally which helps reduce memory consumption.",
     )
     parser.add_argument(
         "--flash_attention_causal_mask",
-        action="store_true",
+        nargs="?",
+        const=True,
+        default=False,
+        action=SetTrueOrFalseOrNone,
         help="Whether to enable Habana Flash Attention in causal mode on first token generation.",
     )
     parser.add_argument(
@@ -393,6 +402,11 @@ def setup_parser(parser):
         action="store_true",
         help="Load an AutoAWQ quantized checkpoint using AutoAWQ.",
     )
+    quant_parser_group.add_argument(
+        "--quantize_with_bnb",
+        action="store_true",
+        help="Quantize model to NF4 using BnB and then use NF4 weights for text-generation",
+    )
     quant_parser_group.add_argument(
         "--disk_offload",
         action="store_true",
@@ -516,7 +530,7 @@ def main():
         per_sequence_profiler = disabled_profiler
         per_token_profiler = active_profiler
 
-    if args.dataset_name == "mlcommons":
+    if args.dataset_name == "openorca" or args.dataset_name == "mlcommons":
         # Benchmark over the prompts below
         def get_ds(args):
             ds = pd.read_pickle(args.mlcommons_dataset)
@@ -546,6 +560,7 @@ def get_input(ds, batch_size):
 
         def generate(input_tokens, size=None, reduce_recompile=False, disable_profiling=False):
             """Generates sequences from the input sentences and returns them."""
+            profiler = disabled_profiler if disable_profiling else per_token_profiler
 
             timer = HabanaGenerationTime()
             timer.start()
@@ -565,6 +580,7 @@ def generate(input_tokens, size=None, reduce_recompile=False, disable_profiling=
                 lazy_mode=use_lazy_mode,
                 hpu_graphs=args.use_hpu_graphs,
                 ignore_eos=args.ignore_eos,
+                profiler=profiler,
             ).cpu()
             outputs = outputs.tolist()
             for i in range(len(outputs)):
@@ -612,6 +628,7 @@ def rounder(x):
         # Benchmark over n_iterations iterations
         N = len(input_sentences)
 
+        per_sequence_profiler.start()
         if dyn_prompt_lens is None:
             for i in range(args.n_iterations):
                 results = []
@@ -621,6 +638,7 @@ def rounder(x):
                     results.extend(generated)
                     print(f"Generating batch {b}/{N}")
                     b += 1
+                per_sequence_profiler.step()
         else:
             repeated_prompt_len = cycle(dyn_prompt_lens)
             for i in range(args.n_iterations):
@@ -630,8 +648,10 @@ def rounder(x):
                 for sentence in input_sentences:
                     generated = generate(sentence, prompt_len, args.reduce_recompile)
                     results.extend(generated)
+                per_sequence_profiler.step()
         timer.step()
         duration = timer.last_duration
+        per_sequence_profiler.stop()
         total_new_tokens_generated = args.n_iterations * args.batch_size * args.max_new_tokens
         throughput = total_new_tokens_generated / duration
 
@@ -859,24 +879,24 @@ def rounder(x):
         if dyn_prompt_lens is None:
             for i in range(args.n_iterations):
                 generated, first_token_time, rest_token_time, e2e_latency = generate(None, args.reduce_recompile)
+                per_sequence_profiler.step()
                 first_token_latencies.append(first_token_time)
                 rest_token_latencies.append(rest_token_time)
                 e2e_latencies.append(e2e_latency)
-                per_sequence_profiler.step()
         else:
             repeated_prompt_len = cycle(dyn_prompt_lens)
             for i in range(args.n_iterations):
                 prompt_len = next(repeated_prompt_len)
                 print("Generating for shape,", prompt_len)
                 generated, first_token_time, rest_token_time, e2e_latency = generate(prompt_len, args.reduce_recompile)
+                per_sequence_profiler.step()
                 first_token_latencies.append(first_token_time)
                 rest_token_latencies.append(rest_token_time)
                 e2e_latencies.append(e2e_latency)
-                per_sequence_profiler.step()
         timer.step()
+        per_sequence_profiler.stop()
         logger.info("Finished running generate")
         duration = timer.last_duration
-        per_sequence_profiler.stop()
         total_new_tokens_generated = args.n_iterations * args.batch_size * args.max_new_tokens
         throughput = total_new_tokens_generated / duration
         # Calculate average latencies
@@ -1033,7 +1053,7 @@ def generate_dataset(batch, disable_profiling=False):
         timer.start()
         for i, batch in enumerate(dataloader):
             timer.step()
-            generate_dataset(batch)
+            generate_dataset(batch, disable_profiling=True)
             timer.step()
             duration = timer.last_duration
             # The first three iterations take longer because of graph compilation
@@ -1042,15 +1062,14 @@ def generate_dataset(batch, disable_profiling=False):
         torch_hpu.synchronize()
         timer.step()
         compilation_duration = timer.last_duration
+
         total_new_tokens_generated = 0
         duration = 0
         separator = "-" * 50
         logger.info("Running generate dataset...")
-
         timer = HabanaGenerationTime()
         timer.start()
         per_sequence_profiler.start()
-
         for i, batch in enumerate(dataloader):
             timer.step()
             prompt, outputs = generate_dataset(batch)
@@ -1067,8 +1086,8 @@ def generate_dataset(batch, disable_profiling=False):
             if args.run_partial_dataset and args.n_iterations == i + 1:
                 break
             per_sequence_profiler.step()
-        timer.step()
         per_sequence_profiler.stop()
+        timer.step()
 
         throughput = total_new_tokens_generated / duration
         # Print Stats
diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
index aeac93af00..20ea0a9cd5 100644
--- a/examples/text-generation/run_lm_eval.py
+++ b/examples/text-generation/run_lm_eval.py
@@ -22,6 +22,8 @@
 import logging
 import multiprocessing as mp
 import os
+from pathlib import Path
+from typing import Union
 
 import psutil
 
@@ -53,6 +55,20 @@ def LimitedSpawnPool(_):
 mp.Pool = LimitedSpawnPool
 
 
+def try_parse_json(value: str) -> Union[str, dict, None]:
+    """
+    From https://github.com/EleutherAI/lm-evaluation-harness/blob/v0.4.9.1/lm_eval/__main__.py
+    """
+    if value is None:
+        return None
+    try:
+        return json.loads(value)
+    except json.JSONDecodeError:
+        if "{" in value:
+            raise argparse.ArgumentTypeError(f"Invalid JSON: {value}. Hint: Use double quotes for JSON strings.")
+        return value
+
+
 def setup_lm_eval_parser():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Evaluation script for HPU"
@@ -62,7 +78,7 @@ def setup_lm_eval_parser():
         type=int,
         nargs="+",
         help="Input length buckets to use with static_shapes",
-        default=[16, 32, 64, 128, 189, 284, 384, 985],
+        default=[16, 32, 64, 128, 189, 284, 384],
     )
 
     parser.add_argument(
@@ -75,7 +91,14 @@ def setup_lm_eval_parser():
         help="Tasks to run",
         default=["hellaswag", "lambada_openai", "piqa", "winogrande"],
     )
-    parser.add_argument("--limit_iters", type=int, help="limit examples to run that many iterations", default=None)
+    parser.add_argument(
+        "--limit",
+        "-L",
+        type=float,
+        default=None,
+        metavar="N|0<N<1",
+        help="Limit the number of examples per task. If <1, limit is a percentage of the total number of examples.",
+    )
     parser.add_argument(
         "--show_config",
         action="store_true",
@@ -88,8 +111,68 @@ def setup_lm_eval_parser():
         help="If True, prints extra-logs for all tasks",
     )
     parser.add_argument("--max_graphs", type=int, help="Maximum number of HPU graphs", default=None)
+    parser.add_argument(
+        "--gen_kwargs",
+        type=try_parse_json,
+        default=None,
+        help=(
+            "Either comma delimited string or JSON formatted arguments for model generation on greedy_until tasks,"
+            """ e.g. '{"temperature":0.7,"until":["hello"]}' or temperature=0,top_p=0.1."""
+        ),
+    )
+    parser.add_argument(
+        "--num_fewshot",
+        "-f",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Number of examples in few-shot context",
+    )
+    parser.add_argument(
+        "--fewshot_as_multiturn",
+        action="store_true",
+        default=False,
+        help="If True, uses the fewshot as a multi-turn conversation",
+    )
+    parser.add_argument(
+        "--metadata",
+        type=json.loads,
+        default=None,
+        help="""JSON string metadata to pass to task configs, for example '{"max_seq_lengths":[4096,8192]}'. Will be merged with model_args. Can also be set in task config.""",
+    )
+    parser.add_argument(
+        "--system_instruction",
+        type=str,
+        default=None,
+        help="System instruction to be used in the prompt",
+    )
+    parser.add_argument(
+        "--apply_chat_template",
+        type=str,
+        nargs="?",
+        const=True,
+        default=False,
+        help=(
+            "If True, apply chat template to the prompt. "
+            "Providing `--apply_chat_template` without an argument will apply the default chat template to the prompt. "
+            "To apply a specific template from the available list of templates, provide the template name as an argument. "
+            "E.g. `--apply_chat_template template_name`"
+        ),
+    )
+    parser.add_argument(
+        "--samples",
+        "-E",
+        default=None,
+        type=str,
+        metavar="/path/to/json",
+        help='JSON string or path to JSON file containing doc indices of selected examples to test. Format: {"task_name":[indices],...}',
+    )
+    parser.add_argument(
+        "--confirm_run_unsafe_code",
+        action="store_true",
+        help="Confirm that you understand the risks of running unsafe code for tasks that require it",
+    )
     args = setup_parser(parser)
-
     return args
 
 
@@ -98,19 +181,51 @@ def main() -> None:
     args = setup_lm_eval_parser()
     model, _, tokenizer, generation_config = initialize_model(args, logger)
 
+    # Delayed imports: external modules are imported here to ensure that
+    # environment variables and runtime configurations are properly initialized
+    # before loading modules that depend on them.
     import torch
     from lm_eval import evaluator, utils
     from model_adapter import HabanaModelAdapter
 
-    with torch.no_grad():
-        lm = HabanaModelAdapter(tokenizer, model, args, generation_config)
-
     from optimum.habana.utils import HabanaGenerationTime, get_hpu_memory_stats
 
+    max_length = None
+    metadata = None
+    if args.metadata:
+        metadata = args.metadata if isinstance(args.metadata, dict) else utils.sample_parse_args_string(args.metadata)
+        max_length = args.metadata.get("max_length")
+
+    if args.fewshot_as_multiturn and args.apply_chat_template is False:
+        raise ValueError(
+            "When `fewshot_as_multiturn` is selected, `apply_chat_template` must be set (either to `True` or to the chosen template name)."
+        )
+    if args.samples:
+        assert args.limit is None, "If --samples is not None, then --limit must be None."
+        if (samples := Path(args.samples)).is_file():
+            args.samples = json.loads(samples.read_text())
+        else:
+            args.samples = json.loads(args.samples)
+
+    with torch.no_grad():
+        lm = HabanaModelAdapter(tokenizer, model, args, generation_config, max_length=max_length)
+
     with HabanaGenerationTime() as timer:
         with torch.no_grad():
-            log_samples = args.log_samples
-            results = evaluator.simple_evaluate(lm, tasks=args.tasks, limit=args.limit_iters, log_samples=log_samples)
+            results = evaluator.simple_evaluate(
+                lm,
+                tasks=args.tasks,
+                limit=args.limit,
+                samples=args.samples,
+                log_samples=args.log_samples,
+                num_fewshot=args.num_fewshot,
+                fewshot_as_multiturn=args.fewshot_as_multiturn,
+                gen_kwargs=args.gen_kwargs,
+                system_instruction=args.system_instruction,
+                apply_chat_template=args.apply_chat_template,
+                metadata=metadata,
+                confirm_run_unsafe_code=args.confirm_run_unsafe_code,
+            )
         if args.device == "hpu":
             import habana_frameworks.torch.hpu as torch_hpu
 
diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py
index 2a6454be75..a82a475484 100644
--- a/examples/text-generation/utils.py
+++ b/examples/text-generation/utils.py
@@ -138,7 +138,7 @@ def setup_env(args):
 
     from optimum.habana.utils import check_optimum_habana_min_version
 
-    check_optimum_habana_min_version("1.18.0.dev0")
+    check_optimum_habana_min_version("1.19.0.dev0")
 
     # Tweak generation so that it runs faster on Gaudi
     from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
@@ -290,6 +290,22 @@ def setup_model(args, model_dtype, model_kwargs, logger):
         model = AutoModelForCausalLM.from_pretrained(
             args.model_name_or_path, torch_dtype=model_dtype, quantization_config=quantization_config, **model_kwargs
         )
+    elif args.quantize_with_bnb:
+        from transformers import BitsAndBytesConfig
+
+        nf4_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.bfloat16,
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            args.model_name_or_path,
+            quantization_config=nf4_config,
+            device_map={"": "hpu"},
+            torch_dtype=model_dtype,
+            **model_kwargs,
+        )
     elif args.load_quantized_model_with_inc:
         # TODO: This will be removed in v1.20 Synapse release
         # Override neural_compressor split_rank_state_dict for loading neural_magic models on multi-cards.
diff --git a/examples/text-to-speech/requirements.txt b/examples/text-to-speech/requirements.txt
index 01d3da67aa..92217bf900 100644
--- a/examples/text-to-speech/requirements.txt
+++ b/examples/text-to-speech/requirements.txt
@@ -1,3 +1,3 @@
-datasets
-soundfile
+datasets == 3.6.0
+soundfile == 0.12.1
 sentencepiece
diff --git a/examples/translation/requirements.txt b/examples/translation/requirements.txt
index ff9ede1567..458e933076 100644
--- a/examples/translation/requirements.txt
+++ b/examples/translation/requirements.txt
@@ -1,7 +1,7 @@
-datasets >= 2.4.0
+datasets == 3.6.0
 sentencepiece != 0.1.92
-protobuf
-sacrebleu >= 1.4.12
-py7zr
+protobuf == 3.20.3
+sacrebleu >= 1.4.12, <= 2.4.2
+py7zr == 0.21.0
 torch >= 1.3
-evaluate
+evaluate == 0.4.3
diff --git a/examples/translation/run_translation.py b/examples/translation/run_translation.py
index c1d8a07d1d..a1892c3371 100644
--- a/examples/translation/run_translation.py
+++ b/examples/translation/run_translation.py
@@ -63,7 +63,7 @@ def check_optimum_habana_min_version(*a, **b):
 
 # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
 
diff --git a/examples/trl/requirements.txt b/examples/trl/requirements.txt
index 502a2d99a8..17e8b74935 100644
--- a/examples/trl/requirements.txt
+++ b/examples/trl/requirements.txt
@@ -1,6 +1,7 @@
 trl == 0.9.6
 peft == 0.15.0
 datasets == 2.19.2
-tyro
-evaluate
+wandb == 0.17.1
+tyro == 0.8.4
+evaluate == 0.4.3
 scikit-learn == 1.5.2
diff --git a/examples/video-comprehension/run_example.py b/examples/video-comprehension/run_example.py
index b53679fb0b..5868bea3e8 100644
--- a/examples/video-comprehension/run_example.py
+++ b/examples/video-comprehension/run_example.py
@@ -24,10 +24,10 @@
 import numpy as np
 import torch
 from huggingface_hub import hf_hub_download
+from transformers import VideoLlavaProcessor
 
 from optimum.habana.transformers.modeling_utils import (
     GaudiVideoLlavaForConditionalGeneration,
-    GaudiVideoLlavaProcessor,
     adapt_transformers_to_gaudi,
 )
 
@@ -168,7 +168,7 @@ def main():
 
         model = wrap_in_hpu_graph(model)
 
-    processor = GaudiVideoLlavaProcessor.from_pretrained(args.model_name_or_path)
+    processor = VideoLlavaProcessor.from_pretrained(args.model_name_or_path)
     processor.tokenizer.padding_side = "left"
     inputs = processor(text=prompts, videos=video_clips, return_tensors="pt")
     inputs = inputs.to(device)
diff --git a/optimum/habana/quantizers/bitsandbytes.py b/optimum/habana/quantizers/bitsandbytes.py
deleted file mode 100644
index ee56b55d53..0000000000
--- a/optimum/habana/quantizers/bitsandbytes.py
+++ /dev/null
@@ -1,265 +0,0 @@
-from functools import lru_cache
-from typing import Any, Dict, List, Optional
-
-from transformers.modeling_utils import PreTrainedModel
-from transformers.pytorch_utils import Conv1D
-from transformers.quantizers.quantizers_utils import get_module_from_name
-from transformers.utils import (
-    ACCELERATE_MIN_VERSION,
-    get_available_devices,
-    is_accelerate_available,
-    is_bitsandbytes_multi_backend_available,
-    is_ipex_available,
-    is_torch_available,
-    logging,
-)
-from transformers.utils.import_utils import _is_package_available
-
-
-if is_torch_available():
-    import torch
-
-_bitsandbytes_available = _is_package_available("bitsandbytes")
-logger = logging.get_logger(__name__)
-
-
-def gaudi_bitsandbytesconfig_post_init(self):
-    r"""
-    Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.
-    Copied from https://github.com/huggingface/transformers/blob/53fad641cfdb5105e2470bcf3ef17ea8e25cc300/src/transformers/utils/quantization_config.py#L430
-    Only difference is removed check on bitsandbytes version
-    """
-    if not isinstance(self.load_in_4bit, bool):
-        raise TypeError("load_in_4bit must be a boolean")
-
-    if not isinstance(self.load_in_8bit, bool):
-        raise TypeError("load_in_8bit must be a boolean")
-
-    if not isinstance(self.llm_int8_threshold, float):
-        raise TypeError("llm_int8_threshold must be a float")
-
-    if self.llm_int8_skip_modules is not None and not isinstance(self.llm_int8_skip_modules, list):
-        raise TypeError("llm_int8_skip_modules must be a list of strings")
-    if not isinstance(self.llm_int8_enable_fp32_cpu_offload, bool):
-        raise TypeError("llm_int8_enable_fp32_cpu_offload must be a boolean")
-
-    if not isinstance(self.llm_int8_has_fp16_weight, bool):
-        raise TypeError("llm_int8_has_fp16_weight must be a boolean")
-
-    if self.bnb_4bit_compute_dtype is not None and not isinstance(self.bnb_4bit_compute_dtype, torch.dtype):
-        raise TypeError("bnb_4bit_compute_dtype must be torch.dtype")
-
-    if not isinstance(self.bnb_4bit_quant_type, str):
-        raise TypeError("bnb_4bit_quant_type must be a string")
-
-    if not isinstance(self.bnb_4bit_use_double_quant, bool):
-        raise TypeError("bnb_4bit_use_double_quant must be a boolean")
-
-
-@lru_cache()
-def gaudi_is_bitsandbytes_available():
-    """
-    Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/utils/import_utils.py#L871
-    Only difference is that CUDA related checks are removed.
-    """
-    if not is_torch_available() or not _bitsandbytes_available:
-        return False
-
-    # Newer versions of `bitsandbytes` can be imported on systems without CUDA.
-    return True
-
-
-def gaudi_validate_bnb_backend_availability(raise_exception=False):
-    """
-    Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
-    Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/integrations/bitsandbytes.py#L545
-    Only difference is that CUDA related functions calls are deleted.
-    """
-    if is_bitsandbytes_multi_backend_available():
-        return _gaudi_validate_bnb_multi_backend_availability(raise_exception)
-
-
-def _gaudi_validate_bnb_multi_backend_availability(raise_exception):
-    """
-    Copied https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/integrations/bitsandbytes.py#L484
-    Only difference is addition of check for HPU
-    """
-    import bitsandbytes as bnb
-
-    bnb_supported_devices = getattr(bnb, "supported_torch_devices", set())
-    available_devices = get_available_devices()
-
-    if "hpu" in bnb_supported_devices:
-        logger.debug("Multi-backend validation successful.")
-        return True
-
-    if available_devices == {"cpu"} and not is_ipex_available():
-        from importlib.util import find_spec
-
-        if find_spec("intel_extension_for_pytorch"):
-            logger.warning(
-                "You have Intel IPEX installed but if you're intending to use it for CPU, it might not have the right version. Be sure to double check that your PyTorch and IPEX installs are compatible."
-            )
-
-        available_devices.discard("cpu")  # Only Intel CPU is supported by BNB at the moment
-
-    if not available_devices.intersection(bnb_supported_devices):
-        if raise_exception:
-            bnb_supported_devices_with_info = set(  # noqa: C401
-                '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)'
-                if device == "cpu"
-                else device
-                for device in bnb_supported_devices
-            )
-            err_msg = (
-                f"None of the available devices `available_devices = {available_devices or None}` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {bnb_supported_devices_with_info}`. "
-                "Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend"
-            )
-
-            logger.error(err_msg)
-            raise RuntimeError(err_msg)
-
-        logger.warning("No supported devices found for bitsandbytes multi-backend.")
-        return False
-
-    logger.debug("Multi-backend validation successful.")
-    return True
-
-
-def gaudi_validate_environment(self, *args, **kwargs):
-    """
-    Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/quantizers/quantizer_bnb_4bit.py#L68
-    Only difference is deletion of bitsandbytes version checks
-    """
-    if not is_accelerate_available():
-        raise ImportError(
-            f"Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
-        )
-    if not gaudi_is_bitsandbytes_available():
-        raise ImportError(
-            "Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`"
-        )
-
-    bnb_multibackend_is_enabled = is_bitsandbytes_multi_backend_available()
-    gaudi_validate_bnb_backend_availability(raise_exception=True)
-
-    if kwargs.get("from_tf", False) or kwargs.get("from_flax", False):
-        raise ValueError(
-            "Converting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make"
-            " sure the weights are in PyTorch format."
-        )
-
-    device_map = kwargs.get("device_map", None)
-    if (
-        device_map is not None
-        and isinstance(device_map, dict)
-        and not self.quantization_config.llm_int8_enable_fp32_cpu_offload
-    ):
-        device_map_without_lm_head = {
-            key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert
-        }
-        if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled:
-            pass
-        elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
-            raise ValueError(
-                "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
-                "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
-                "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
-                "`from_pretrained`. Check "
-                "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
-                "for more details. "
-            )
-
-
-def gaudi_create_quantized_param(
-    self,
-    model: "PreTrainedModel",
-    param_value: "torch.Tensor",
-    param_name: str,
-    target_device: "torch.device",
-    state_dict: Dict[str, Any],
-    unexpected_keys: Optional[List[str]] = None,
-):
-    """
-    Copied from https://github.com/huggingface/transformers/blob/62c60a30181a65e1a3a7f19c3055a240a6a21335/src/transformers/quantizers/quantizer_bnb_4bit.py#L138
-    only diiference is addition of HPU device
-    """
-    import bitsandbytes as bnb
-
-    module, tensor_name = get_module_from_name(model, param_name)
-
-    if tensor_name not in module._parameters:
-        raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.")
-
-    old_value = getattr(module, tensor_name)
-
-    if tensor_name == "bias":
-        if param_value is None:
-            new_value = old_value.to(target_device)
-        else:
-            new_value = param_value.to(target_device)
-
-        new_value = torch.nn.Parameter(new_value, requires_grad=old_value.requires_grad)
-        module._parameters[tensor_name] = new_value
-        return
-
-    if not isinstance(module._parameters[tensor_name], bnb.nn.Params4bit):
-        raise ValueError("this function only loads `Linear4bit components`")
-    if (
-        old_value.device == torch.device("meta")
-        and target_device not in ["meta", torch.device("meta")]
-        and param_value is None
-    ):
-        raise ValueError(f"{tensor_name} is on the meta device, we need a `value` to put in on {target_device}.")
-
-    # construct `new_value` for the module._parameters[tensor_name]:
-    if self.pre_quantized:
-        # 4bit loading. Collecting components for restoring quantized weight
-        # This can be expanded to make a universal call for any quantized weight loading
-
-        if not self.is_serializable:
-            raise ValueError(
-                "Detected int4 weights but the version of bitsandbytes is not compatible with int4 serialization. "
-                "Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`."
-            )
-
-        if (param_name + ".quant_state.bitsandbytes__fp4" not in state_dict) and (
-            param_name + ".quant_state.bitsandbytes__nf4" not in state_dict
-        ):
-            raise ValueError(
-                f"Supplied state dict for {param_name} does not contain `bitsandbytes__*` and possibly other `quantized_stats` components."
-            )
-
-        quantized_stats = {}
-        for k, v in state_dict.items():
-            if param_name + "." in k:
-                quantized_stats[k] = v
-                if unexpected_keys is not None and k in unexpected_keys:
-                    unexpected_keys.remove(k)
-
-        param_kwargs = {}
-        if self.is_bnb_supports_quant_storage_module:
-            param_kwargs["module"] = module
-
-        new_value = bnb.nn.Params4bit.from_prequantized(
-            data=param_value,
-            quantized_stats=quantized_stats,
-            requires_grad=False,
-            device=target_device,
-            **param_kwargs,
-        )
-    else:
-        if target_device == "hpu":
-            new_value = param_value.to("hpu")
-        else:
-            new_value = param_value.to("cpu")
-
-        # Support models using `Conv1D` in place of `nn.Linear` (e.g. openai-community/gpt2) by transposing the weight matrix prior to quantization.
-        # Since weights are saved in the correct "orientation", we skip transposing when loading.
-        if issubclass(module.source_cls, Conv1D):
-            new_value = new_value.T
-
-        kwargs = old_value.__dict__
-        new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(target_device)
-
-    module._parameters[tensor_name] = new_value
diff --git a/optimum/habana/transformers/generation/utils.py b/optimum/habana/transformers/generation/utils.py
index a8b1858e99..e8984c05b4 100755
--- a/optimum/habana/transformers/generation/utils.py
+++ b/optimum/habana/transformers/generation/utils.py
@@ -134,6 +134,7 @@
     "qwen2_vl",
     "qwen3",
     "qwen3_moe",
+    "arctic",
 ]
 
 # Initial generated token index is set to 1 to accomodate SOS (start of string) token.
@@ -2473,6 +2474,7 @@ def _contrastive_search(
                     do_padding = (
                         key_to_check is not None
                         and outputs.past_key_values[0][0].shape[2] == model_inputs[key_to_check].shape[1]
+                        and generation_config.max_new_tokens > 1
                     )
 
                 if do_padding:
@@ -2837,6 +2839,7 @@ def _sample(
                     do_padding = (
                         key_to_check is not None
                         and outputs.past_key_values[0][0].shape[2] == model_inputs[key_to_check].shape[1]
+                        and generation_config.max_new_tokens > 1
                     )
 
                 if do_padding:
diff --git a/optimum/habana/transformers/modeling_utils.py b/optimum/habana/transformers/modeling_utils.py
index 66802f7135..7b86a35b81 100644
--- a/optimum/habana/transformers/modeling_utils.py
+++ b/optimum/habana/transformers/modeling_utils.py
@@ -18,13 +18,6 @@
 import transformers
 import transformers.utils.fx
 
-from ..quantizers.bitsandbytes import (
-    gaudi_bitsandbytesconfig_post_init,
-    gaudi_create_quantized_param,
-    gaudi_is_bitsandbytes_available,
-    gaudi_validate_bnb_backend_availability,
-    gaudi_validate_environment,
-)
 from .generation import (
     GaudiGenerationConfig,
     GaudiGenerationMixin,
@@ -43,6 +36,9 @@
 from .loss import gaudi_RTDetrHungarianMatcher_forward
 from .models import (
     GAUDI_WHISPER_ATTENTION_CLASSES,
+    ArcticConfig,
+    ArcticForCausalLM,
+    ArcticTokenizer,
     BaichuanConfig,
     BaichuanForCausalLM,
     BaichuanTokenizer,
@@ -192,7 +188,6 @@
     GaudiStarcoder2ForCausalLM,
     GaudiStarcoder2Model,
     GaudiVideoLlavaForConditionalGeneration,
-    GaudiVideoLlavaProcessor,
     GaudiVisionSdpaAttention,
     GaudiWav2Vec2SdpaAttention,
     GaudiWhisperDecoder,
@@ -323,14 +318,6 @@ def adapt_transformers_to_gaudi():
     for Gaudi.
     """
 
-    transformers.utils.quantization_config.BitsAndBytesConfig.post_init = gaudi_bitsandbytesconfig_post_init
-    transformers.utils.import_utils.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
-    transformers.utils.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
-    transformers.quantizers.quantizer_bnb_4bit.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
-    transformers.integrations.bitsandbytes.validate_bnb_backend_availability = gaudi_validate_bnb_backend_availability
-    transformers.quantizers.quantizer_bnb_4bit.Bnb4BitHfQuantizer.validate_environment = gaudi_validate_environment
-    transformers.quantizers.quantizer_bnb_4bit.Bnb4BitHfQuantizer.create_quantized_param = gaudi_create_quantized_param
-
     # models that support symbolic tracing should be added to this list
     models_with_tracing_support = []
 
@@ -776,7 +763,6 @@ def adapt_transformers_to_gaudi():
     transformers.models.video_llava.modeling_video_llava.VideoLlavaForConditionalGeneration = (
         GaudiVideoLlavaForConditionalGeneration
     )
-    transformers.models.video_llava.processing_video_llava.VideoLlavaProcessor = GaudiVideoLlavaProcessor
 
     # Optimization for Whisper on Gaudi
     transformers.models.whisper.modeling_whisper.WhisperSdpaAttention = GaudiWhisperSdpaAttention
@@ -874,3 +860,7 @@ def adapt_transformers_to_gaudi():
 
     # Optimization for RT-DETR model on Gaudi
     transformers.loss.loss_rt_detr.RTDetrHungarianMatcher.forward = gaudi_RTDetrHungarianMatcher_forward
+
+    transformers.AutoConfig.register("arctic", ArcticConfig)
+    transformers.AutoModelForCausalLM.register(ArcticConfig, ArcticForCausalLM)
+    transformers.AutoTokenizer.register(ArcticConfig, ArcticTokenizer)
diff --git a/optimum/habana/transformers/models/__init__.py b/optimum/habana/transformers/models/__init__.py
index 81ebae6e51..9b28749b42 100644
--- a/optimum/habana/transformers/models/__init__.py
+++ b/optimum/habana/transformers/models/__init__.py
@@ -313,6 +313,7 @@
     GaudiSiglipVisionModel,
     GaudiSiglipVisionTransformer,
 )
+from .snowflake import ArcticConfig, ArcticForCausalLM, ArcticTokenizer
 from .speecht5 import (
     gaudi_generate_speech,
     gaudi_SpeechT5Attention_forward,
@@ -341,7 +342,7 @@
     gaudi_T5Stack_forward,
 )
 from .table_transformer import gaudi_table_transformer_conv_encoder_forward
-from .video_llava import GaudiVideoLlavaForConditionalGeneration, GaudiVideoLlavaProcessor
+from .video_llava import GaudiVideoLlavaForConditionalGeneration
 from .vision_encoder_decoder import (
     gaudi_VisionEncoderDecoderModel_prepare_inputs_for_generation,
 )
diff --git a/optimum/habana/transformers/models/llama/modeling_llama.py b/optimum/habana/transformers/models/llama/modeling_llama.py
index 75d8097cc8..4b4b903bfd 100755
--- a/optimum/habana/transformers/models/llama/modeling_llama.py
+++ b/optimum/habana/transformers/models/llama/modeling_llama.py
@@ -522,7 +522,7 @@ def get_k_proj_weight_dtype(self):
         if hasattr(self.k_proj, "qweight"):
             return self.k_proj.scales.dtype
         elif hasattr(self.k_proj, "use_qdq") and self.k_proj.use_qdq:
-            return self.k_proj.dequant_weights.hp_dtype
+            return self.k_proj.weight.dtype
         elif isinstance(self.k_cache, KVCache) and "float8" in str(self.k_proj.weight.dtype):
             return self.k_proj.hp_dtype
         return self.k_proj.weight.dtype
@@ -1000,7 +1000,6 @@ def forward(
                     valid_sequence_lengths=sub_valid_sequence_lengths[i],
                     cache_idx=cache_idx,
                     num_virtual_tokens=num_virtual_tokens,
-                    **kwargs,
                 )
                 self.self_attn.attention_all_reduce(split_hidden_states[i])
                 if output_attentions:
@@ -1044,7 +1043,6 @@ def forward(
                 valid_sequence_lengths=valid_sequence_lengths,
                 cache_idx=cache_idx,
                 num_virtual_tokens=num_virtual_tokens,
-                **kwargs,
             )
             self.self_attn.attention_all_reduce(hidden_states)
             hidden_states, residual = self.post_attn_pre_mlp(hidden_states, residual)
diff --git a/optimum/habana/transformers/models/snowflake/__init__.py b/optimum/habana/transformers/models/snowflake/__init__.py
new file mode 100644
index 0000000000..a907bf0e56
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/__init__.py
@@ -0,0 +1,3 @@
+from .configuration_arctic import ArcticConfig
+from .modeling_arctic import ArcticForCausalLM
+from .tokenization_arctic import ArcticTokenizer
diff --git a/optimum/habana/transformers/models/snowflake/configuration_arctic.py b/optimum/habana/transformers/models/snowflake/configuration_arctic.py
new file mode 100644
index 0000000000..bf81f4942c
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/configuration_arctic.py
@@ -0,0 +1,216 @@
+# Copyright 2023 Snowflake AI and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Arctic model configuration. Copied from https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d."""
+
+from dataclasses import asdict, dataclass
+from typing import Any, Dict
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+ARCTIC_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    "arctic": "https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/main/config.json",
+}
+
+
+@dataclass
+class ArcticLoraConfig:
+    lora_r: int = 64
+    lora_alpha: float = 16
+    shard_base_weights: bool = False
+
+
+@dataclass
+class ArcticQuantizationConfig:
+    q_bits: int = 8
+    rounding: str = "nearest"
+    mantissa_bits: int = 3
+    group_size: int = 512
+
+
+class ArcticConfig(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`ArcticModel`]. It is used to instantiate an
+    Arctic model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of the #TODO(rsamdani): add what model has the default config..
+
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 32000):
+            Vocabulary size of the Arctic model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`ArcticModel`]
+        hidden_size (`int`, *optional*, defaults to 4096):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 14336):
+            Dimension of the MLP representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        num_key_value_heads (`int`, *optional*, defaults to 8):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
+            The maximum sequence length that this model might ever be used with. Arctic's sliding window attention
+            allows sequence of up to 4096*32 tokens.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
+            The epsilon used by the rms normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        pad_token_id (`int`, *optional*):
+            The id of the padding token.
+        bos_token_id (`int`, *optional*, defaults to 1):
+            The id of the "beginning-of-sequence" token.
+        eos_token_id (`int`, *optional*, defaults to 2):
+            The id of the "end-of-sequence" token.
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether the model's input and output word embeddings should be tied.
+        rope_theta (`float`, *optional*, defaults to 1000000.0):
+            The base period of the RoPE embeddings.
+        sliding_window (`int`, *optional*):
+            Sliding window attention window size. If not specified, will default to `4096`.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        num_experts_per_tok (`int`, *optional*, defaults to 2):
+            The number of experts to root per-token, can be also interpreted as the `top-p` routing
+            parameter
+        num_local_experts (`int`, *optional*, defaults to 8):
+            Number of experts per Sparse MLP layer.
+        router_aux_loss_coef (`float`, *optional*, defaults to 0.001):
+            The aux loss factor for the total loss.
+
+    ```python
+    >>> from transformers import ArcticModel, ArcticConfig
+
+    >>> # Initializing a Arctic 7B style configuration TODO(rsamdani): verify which model does the default configuration correspond to.
+    >>> configuration = ArcticConfig()
+
+    >>> # Initializing a model from the Arctic 7B style configuration
+    >>> model = ArcticModel(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+
+    model_type = "arctic"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=32000,
+        hidden_size=4096,
+        intermediate_size=14336,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=None,
+        hidden_act="silu",
+        max_position_embeddings=4096,
+        initializer_range=0.02,
+        rms_norm_eps=1e-5,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=1,
+        eos_token_id=2,
+        tie_word_embeddings=False,
+        rope_theta=1e6,
+        sliding_window=None,
+        attention_dropout=0.0,
+        num_experts_per_tok=1,
+        num_local_experts=8,
+        router_aux_loss_coef=0.001,
+        moe_layer_frequency=2,
+        parallel_attn_mlp_res=False,
+        moe_train_capacity_factor=1,
+        moe_eval_capacity_factor=1,
+        enable_expert_tensor_parallelism=False,
+        moe_min_capacity=0,
+        moe_token_dropping=True,
+        quantization=None,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.sliding_window = sliding_window
+
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.attention_dropout = attention_dropout
+
+        self.num_experts_per_tok = num_experts_per_tok
+        self.num_local_experts = num_local_experts
+        self.router_aux_loss_coef = router_aux_loss_coef
+        self.moe_layer_frequency = moe_layer_frequency
+        self.moe_train_capacity_factor = moe_train_capacity_factor
+        self.moe_eval_capacity_factor = moe_eval_capacity_factor
+        self.enable_expert_tensor_parallelism = enable_expert_tensor_parallelism
+        self.moe_min_capacity = moe_min_capacity
+        self.moe_token_dropping = moe_token_dropping
+        self.parallel_attn_mlp_res = parallel_attn_mlp_res
+        if isinstance(quantization, dict):
+            self.quantization = ArcticQuantizationConfig(**quantization)
+        else:
+            self.quantization = quantization
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+    @classmethod
+    def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "ArcticConfig":
+        result = super().from_dict(config_dict, **kwargs)
+        if isinstance(result, tuple):
+            config = result[0]
+        else:
+            config = result
+        if isinstance(config.quantization, dict):
+            config.quantization = ArcticQuantizationConfig(**config.quantization)
+        return result
+
+    def to_dict(self) -> Dict[str, Any]:
+        ret = super().to_dict()
+        if isinstance(ret["quantization"], ArcticQuantizationConfig):
+            ret["quantization"] = asdict(ret["quantization"])
+        return ret
diff --git a/optimum/habana/transformers/models/snowflake/modeling_arctic.py b/optimum/habana/transformers/models/snowflake/modeling_arctic.py
new file mode 100644
index 0000000000..40e5d9f3d3
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/modeling_arctic.py
@@ -0,0 +1,1511 @@
+# coding=utf-8
+# Copyright 2023 Mistral AI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch Arctic model. Adapted from https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d.
+
+Changes made:
+- Use HPU FusedRoPE implementation
+- Use HPU FusedRMSNorm implementation
+- Added mark steps
+"""
+
+import math
+import warnings
+from typing import List, Optional, Tuple, Union
+
+import habana_frameworks.torch.core as htcore
+import torch
+import torch.nn.functional as F
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers.activations import ACT2FN
+from transformers.cache_utils import Cache
+from transformers.generation import GenerationMixin
+from transformers.integrations.deepspeed import is_deepspeed_available
+from transformers.modeling_attn_mask_utils import (
+    _prepare_4d_causal_attention_mask,
+    _prepare_4d_causal_attention_mask_for_sdpa,
+)
+from transformers.modeling_outputs import (
+    MoeCausalLMOutputWithPast,
+    MoeModelOutputWithPast,
+    SequenceClassifierOutputWithPast,
+)
+from transformers.modeling_utils import PreTrainedModel
+from transformers.utils import (
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    logging,
+    replace_return_docstrings,
+)
+
+from ..llama.modeling_llama import (
+    GaudiLlamaRotaryEmbedding,
+)
+from ..mixtral.modeling_mixtral import GaudiMixtralAttentionLongSequence
+from ..modeling_all_models import KVCache, apply_customized_rope_module
+from .configuration_arctic import ArcticConfig
+
+
+try:
+    from habana_frameworks.torch.hpex.kernels import RotaryPosEmbeddingHelperV2 as FusedRoPE
+except ImportError:
+    print("Not using HPU fused kernel for apply_rotary_pos_emb")
+    FusedRoPE = None
+
+try:
+    from habana_frameworks.torch.hpex.normalization import FusedRMSNorm
+except ImportError:
+    print("Not using HPU fused kernel for RMSNorm")
+    FusedRMSNorm = None
+
+try:
+    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+    print("Not using HPU fused scaled dot-product attention kernel.")
+    FusedSDPA = None
+
+
+deepspeed_available = is_deepspeed_available()
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "ArcticConfig"
+USE_DEEPSPEED_MOE_ARG = "use_deepspeed_moe_implementation"
+MOE_EXPERT_PARALLEL_SIZE_ARG = "moe_expert_parallel_size"
+DEEPSPEED_QUANTIZATION_CONFIG = "deepspeed_quantization"
+DEEPSPEED_LORA_CONFIG = "deepspeed_lora"
+QUANTIZATION_CONFIG = "ds_quantization_config"
+
+
+def load_balancing_loss_func(
+    gate_logits: torch.Tensor, num_experts: torch.Tensor = None, top_k=4, attention_mask: Optional[torch.Tensor] = None
+) -> float:
+    r"""
+    Computes auxiliary load balancing loss as in Switch Transformer - implemented in Pytorch.
+
+    See Switch Transformer (https://arxiv.org/abs/2101.03961) for more details. This function implements the loss
+    function presented in equations (4) - (6) of the paper. It aims at penalizing cases where the routing between
+    experts is too unbalanced.
+
+    Args:
+        gate_logits (Union[`torch.Tensor`, Tuple[torch.Tensor]):
+            Logits from the `gate`, should be a tuple of model.config.num_hidden_layers tensors of
+            shape [batch_size X sequence_length, num_experts].
+        attention_mask (`torch.Tensor`, None):
+            The attention_mask used in forward function
+            shape [batch_size X sequence_length] if not None.
+        num_experts (`int`, *optional*):
+            Number of experts
+
+    Returns:
+        The auxiliary loss.
+    """
+    if gate_logits is None or not isinstance(gate_logits, tuple):
+        return 0
+
+    if isinstance(gate_logits, tuple):
+        compute_device = gate_logits[0].device
+        concatenated_gate_logits = torch.cat([layer_gate.to(compute_device) for layer_gate in gate_logits], dim=0)
+
+    routing_weights = torch.nn.functional.softmax(concatenated_gate_logits, dim=-1)
+
+    _, selected_experts = torch.topk(routing_weights, top_k, dim=-1)
+
+    expert_mask = torch.nn.functional.one_hot(selected_experts, num_experts)
+
+    if attention_mask is None:
+        # Compute the percentage of tokens routed to each experts
+        tokens_per_expert = torch.mean(expert_mask.float(), dim=0)
+
+        # Compute the average probability of routing to these experts
+        router_prob_per_expert = torch.mean(routing_weights, dim=0)
+    else:
+        batch_size, sequence_length = attention_mask.shape
+        num_hidden_layers = concatenated_gate_logits.shape[0] // (batch_size * sequence_length)
+
+        # Compute the mask that masks all padding tokens as 0 with the same shape of expert_mask
+        expert_attention_mask = (
+            attention_mask[None, :, :, None, None]
+            .expand((num_hidden_layers, batch_size, sequence_length, 2, num_experts))
+            .reshape(-1, 2, num_experts)
+            .to(compute_device)
+        )
+
+        # Compute the percentage of tokens routed to each experts
+        tokens_per_expert = torch.sum(expert_mask.float() * expert_attention_mask, dim=0) / torch.sum(
+            expert_attention_mask, dim=0
+        )
+
+        # Compute the mask that masks all padding tokens as 0 with the same shape of tokens_per_expert
+        router_per_expert_attention_mask = (
+            attention_mask[None, :, :, None]
+            .expand((num_hidden_layers, batch_size, sequence_length, num_experts))
+            .reshape(-1, num_experts)
+            .to(compute_device)
+        )
+
+        # Compute the average probability of routing to these experts
+        router_prob_per_expert = torch.sum(routing_weights * router_per_expert_attention_mask, dim=0) / torch.sum(
+            router_per_expert_attention_mask, dim=0
+        )
+
+    overall_loss = torch.sum(tokens_per_expert * router_prob_per_expert.unsqueeze(0))
+    return overall_loss * num_experts
+
+
+# Copied from transformers.models.llama.modeling_llama._get_unpad_data
+def _get_unpad_data(attention_mask):
+    seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
+    indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+    max_seqlen_in_batch = seqlens_in_batch.max().item()
+    cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0))
+    return (
+        indices,
+        cu_seqlens,
+        max_seqlen_in_batch,
+    )
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaRMSNorm with Llama->Arctic
+class ArcticRMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        ArcticRMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, hidden_states):
+        """
+        Copied from optimum/habana/transformers/models/llama/modeling_llama.py gaudi_llama_rmsnorm_forward
+        """
+        if hidden_states.device.type == "hpu" and FusedRMSNorm is not None:
+            # mixed dtypes are not good for FusedRMSNorm, both inputs need to have same dtype
+            if hidden_states.dtype != self.weight.dtype:
+                orig_dtype = hidden_states.dtype
+                hidden_states = FusedRMSNorm.apply(
+                    hidden_states.to(self.weight.dtype), self.weight, self.variance_epsilon
+                )
+                return hidden_states.to(orig_dtype)
+            else:
+                hidden_states = FusedRMSNorm.apply(hidden_states, self.weight, self.variance_epsilon)
+                return hidden_states
+        else:
+            input_dtype = hidden_states.dtype
+            hidden_states = hidden_states.to(torch.float32)
+            variance = hidden_states.pow(2).mean(-1, keepdim=True)
+            hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+            return self.weight * hidden_states.to(input_dtype)
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->Arctic
+class ArcticRotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
+        super().__init__()
+
+        self.dim = dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        # Build here to make `torch.jit.trace` work.
+        self._set_cos_sin_cache(
+            seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
+        )
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+        freqs = torch.outer(t, self.inv_freq)
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("_cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("_sin_cached", emb.sin().to(dtype), persistent=False)
+
+    def forward(self, x, seq_len=None):
+        # x: [bs, num_attention_heads, seq_len, head_size]
+        if seq_len > self.max_seq_len_cached:
+            self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
+
+        return (
+            self._cos_cached[:seq_len].to(dtype=x.dtype),
+            self._sin_cached[:seq_len].to(dtype=x.dtype),
+        )
+
+
+# Copied from transformers.models.llama.modeling_llama.rotate_half
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+
+
+# Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
+    """Applies Rotary Position Embedding to the query and key tensors.
+
+    Args:
+        q (`torch.Tensor`): The query tensor.
+        k (`torch.Tensor`): The key tensor.
+        cos (`torch.Tensor`): The cosine part of the rotary embedding.
+        sin (`torch.Tensor`): The sine part of the rotary embedding.
+        position_ids (`torch.Tensor`):
+            The position indices of the tokens corresponding to the query and key tensors. For example, this can be
+            used to pass offsetted position ids when working with a KV-cache.
+        unsqueeze_dim (`int`, *optional*, defaults to 1):
+            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
+            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
+            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
+            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
+            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
+            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
+    Returns:
+        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+    """
+    cos = cos[position_ids].unsqueeze(unsqueeze_dim)
+    sin = sin[position_ids].unsqueeze(unsqueeze_dim)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+
+
+# Copied from optimum/habana/transformers/models/llama/modeling_llama.py gaudi_llama_repeat_kv()
+def repeat_kv(
+    query_states: torch.Tensor,
+    key_states: torch.Tensor,
+    value_states: torch.Tensor,
+    attention_mask: torch.Tensor,
+    n_rep: int,
+):
+    """
+    Copied from repeat_kv: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
+    The only differences are:
+        - Append num_key_value_heads == 1 check as kv states can be broadcasted during matmuls so need to expand and reshape them.
+        - Add new args query_states, key_states, value_states and attention_mask and update the logic for expansion.
+    The query states go from (batch, num_heads, seqlen, head_dim) to (batch, num_key_value_heads, n_rep, seqlen, head_dim)
+    The key/value states go from (batch, num_key_value_heads, seqlen, head_dim) to (batch, num_key_value_heads, 1, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, kv_len, head_dim = key_states.shape
+    if n_rep == 1 or num_key_value_heads == 1:
+        return query_states, key_states, value_states, attention_mask
+
+    new_kv_shape = (batch, num_key_value_heads, 1, kv_len, head_dim)
+    key_states = key_states.reshape(new_kv_shape)
+    value_states = value_states.reshape(new_kv_shape)
+
+    batch, _, q_len, head_dim = query_states.shape
+    new_q_shape = (batch, num_key_value_heads, n_rep, q_len, head_dim)
+    query_states = query_states.reshape(new_q_shape)
+
+    if attention_mask is not None:
+        # Add groups dim and set to 1
+        attention_mask = attention_mask.unsqueeze(1)
+
+    return query_states, key_states, value_states, attention_mask
+
+
+# Copied from transformers.models.mistral.modeling_mistral.MistralAttention with Mistral->Arctic
+class ArcticAttention(nn.Module):
+    """
+    Multi-headed attention from 'Attention Is All You Need' paper. Modified to use sliding window attention: Longformer
+    and "Generating Long Sequences with Sparse Transformers".
+    """
+
+    def __init__(self, config: ArcticConfig, layer_idx: Optional[int] = None, **kwargs):
+        super().__init__()
+        config.rope_scaling = getattr(config, "rope_scaling", None)
+        self.config = config
+        self.layer_idx = layer_idx
+        if layer_idx is None:
+            logger.warning_once(
+                f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
+                "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
+                "when creating this class."
+            )
+
+        self.rotary_emb = GaudiLlamaRotaryEmbedding(config=self.config)
+        self.k_cache = KVCache()
+        self.v_cache = KVCache()
+        self.inp_seq_len = -1
+        self.block_size = 1024
+
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = self.hidden_size // self.num_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+        self.max_position_embeddings = config.max_position_embeddings
+        self.rope_theta = config.rope_theta
+        self.is_causal = True
+        self.attention_dropout = config.attention_dropout
+        if (self.head_dim * self.num_heads) != self.hidden_size:
+            raise ValueError(
+                f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
+                f" and `num_heads`: {self.num_heads})."
+            )
+
+        self.q_proj = nn.Linear(
+            self.hidden_size,
+            self.num_heads * self.head_dim,
+            bias=False,
+        )
+        self.k_proj = nn.Linear(
+            self.hidden_size,
+            self.num_key_value_heads * self.head_dim,
+            bias=False,
+        )
+        self.v_proj = nn.Linear(
+            self.hidden_size,
+            self.num_key_value_heads * self.head_dim,
+            bias=False,
+        )
+        self.o_proj = nn.Linear(
+            self.hidden_size,
+            self.hidden_size,
+            bias=False,
+        )
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        """
+        Allocate KV cache. Copied from ../mixtral/modeling_mixtral.py GaudiMixtralAttention.allocate_kv_cache
+        """
+        cache_shape = (batch_size, self.num_key_value_heads, max_seq_len, self.head_dim)
+        device = self.k_proj.weight.device
+        dtype = self.config.torch_dtype
+        self.k_cache.allocate(inp_seq_len, dtype, device, cache_shape)
+        self.v_cache.allocate(inp_seq_len, dtype, device, cache_shape)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        cache_idx: Optional[int] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        """
+        Adapted from ArcticAttention.forward: https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d
+
+        Referenece Gaudi implementation from ../mixtral/modeling_mixtral.py GaudiMixtralAttention
+
+        Changes made:
+        - Added new args
+            - token_idx
+            - attn_softmax_bf16
+            - reuse_cache
+            - flash_attention_recompute
+            - cache_idx
+        - Optimize KV cache
+        - Use FusedSDPA attention
+        """
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+        bsz, q_len, _ = hidden_states.size()
+
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            if self.layer_idx is None:
+                raise ValueError(
+                    f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                    "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                    "with a layer index."
+                )
+            if token_idx is None:
+                if hasattr(past_key_value, "get_usable_length"):
+                    kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+                else:
+                    kv_seq_len += past_key_value[0].shape[-2]
+            else:
+                if reuse_cache:
+                    kv_seq_len = past_key_value[0][-2]
+                else:
+                    kv_seq_len = past_key_value[0].shape[-2]
+        cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+        query_states, key_states = apply_customized_rope(
+            query_states, key_states, cos, sin, position_ids, self.training
+        )
+
+        if use_cache:
+            if reuse_cache:
+                key_states = self.k_cache(key_states, 2, token_idx)
+                value_states = self.v_cache(value_states, 2, token_idx)
+                past_key_value = (self.k_cache.get_shape(), self.v_cache.get_shape())
+            else:
+                if past_key_value is None:
+                    past_key = torch.zeros(key_states.shape, dtype=self.k_proj.weight.dtype, device=key_states.device)
+                    past_value = torch.zeros(
+                        key_states.shape, dtype=self.k_proj.weight.dtype, device=key_states.device
+                    )
+                    past_key_value = (past_key, past_value)
+                key_states = self.k_cache.update(past_key_value[0], key_states, 2, token_idx, self.inp_seq_len)
+                value_states = self.v_cache.update(past_key_value[1], value_states, 2, token_idx, self.inp_seq_len)
+                if token_idx is None:
+                    past_key_value = (key_states, value_states)
+
+            if cache_idx is not None and q_len == 1:
+                key_states = key_states[:, :, :cache_idx, :]
+                value_states = value_states[:, :, :cache_idx, :]
+                if attention_mask is not None:
+                    attention_mask = attention_mask[:, :, :, :cache_idx]
+                kv_seq_len = key_states.shape[-2]
+        else:
+            past_key_value = None
+
+        if FusedSDPA is not None:
+            if query_states.dtype != key_states.dtype:
+                key_states = key_states.type(query_states.dtype)
+                value_states = value_states.type(query_states.dtype)
+            # support long sequences exceeding 8192
+            if not self.training and q_len == key_states.size(-2) and q_len > 8192:
+                htcore.mark_step()
+                attn_output = GaudiMixtralAttentionLongSequence.forward(
+                    query_states,
+                    key_states,
+                    value_states,
+                    attention_mask,
+                    False,
+                    self.block_size,
+                )
+                htcore.mark_step()
+            else:
+                attn_output = FusedSDPA.apply(
+                    query_states,
+                    key_states,
+                    value_states,
+                    attention_mask,
+                    0.0,
+                    False,
+                    None,
+                    "None",
+                    flash_attention_recompute,
+                )
+        else:
+            # repeat k/v heads if n_kv_heads < n_heads
+            query_states, key_states, value_states, attention_mask = repeat_kv(
+                query_states, key_states, value_states, attention_mask, self.num_key_value_groups
+            )
+
+            attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+            if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+                raise ValueError(
+                    f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+                    f" {attn_weights.size()}"
+                )
+
+            if attention_mask is not None:
+                if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+                    raise ValueError(
+                        f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+                    )
+
+                attn_weights = attn_weights + attention_mask
+
+            # upcast attention to fp32
+            attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+            attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
+            attn_output = torch.matmul(attn_weights, value_states)
+
+            if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+                raise ValueError(
+                    f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+                    f" {attn_output.size()}"
+                )
+
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+        attn_output = self.o_proj(attn_output)
+
+        if not output_attentions or FusedSDPA:
+            attn_weights = None
+
+        return attn_output, attn_weights, past_key_value
+
+
+class ArcticMLP(nn.Module):
+    def __init__(
+        self,
+        config: ArcticConfig,
+        is_residual_mlp=False,
+    ):
+        """MLP class for Arctic supporting vanilla linear layers as well as some deepspeed optimizations.
+
+        ds_optimized_lora_config: config of type ds_linear.LoRAConfig that contains lora specific parameter if we want to add lora to this layer.
+        ds_optimized_quantization_config: config of type ds_linear.QuantizationConfig.
+        ds_optimized_base_weight_sharding: bool. If true, the base weight for lora (provided ds_optimized_lora_config is not None) will be sharded across all available gpus
+        in a tensor parallel way.
+        is_residual_mlp: bool. If true, this is MLP inside arctic residual layer which has ffn_dim the same as full intermediate_size.
+        """
+        super(ArcticMLP, self).__init__()
+        self.hidden_dim = config.hidden_size
+        self.ffn_dim = config.intermediate_size if not is_residual_mlp else self.hidden_dim
+        self.w1 = nn.Linear(
+            self.hidden_dim,
+            self.ffn_dim,
+            bias=False,
+        )
+        self.w2 = nn.Linear(
+            self.ffn_dim,
+            self.hidden_dim,
+            bias=False,
+        )
+        self.w3 = nn.Linear(
+            self.hidden_dim,
+            self.ffn_dim,
+            bias=False,
+        )
+        self.act_fn = ACT2FN[config.hidden_act]
+
+    def forward(self, hidden_states):
+        current_hidden_states = self.act_fn(self.w1(hidden_states)) * self.w3(hidden_states)
+        current_hidden_states = self.w2(current_hidden_states)
+        return current_hidden_states
+
+
+class ArcticMoE(nn.Module):
+    def __init__(self, config: ArcticConfig, layer_id: int, **kwargs):
+        super(ArcticMoE, self).__init__()
+
+        self.hidden_dim = config.hidden_size
+        self.num_experts = config.num_local_experts
+        self.layer_id = layer_id
+        self.top_k = config.num_experts_per_tok
+        self.is_moe_layer = (layer_id + 1) % config.moe_layer_frequency == 0
+
+        if not self.is_moe_layer:  # dense, not MoE
+            self.mlp = ArcticMLP(config)
+        else:
+            # "local" MoE implementation
+            self.gate = nn.Linear(self.hidden_dim, self.num_experts, bias=False)
+            self.experts = nn.ModuleList([ArcticMLP(config) for i in range(self.num_experts)])
+
+        # if torch.distributed.get_rank() == 0:
+        #     deepspeed.runtime.utils.see_memory_usage("", force=True)
+
+    # Similar in behavior to transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock.forward but more efficient.
+    def _moe_foreward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        batch_size, sequence_length, hidden_dim = hidden_states.shape
+        hidden_states = hidden_states.view(-1, hidden_dim)
+        # router_logits: (batch * sequence_length, n_experts)
+        router_logits = self.gate(hidden_states)
+
+        routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
+        routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
+        routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
+        # we cast back to the input dtype
+        routing_weights = routing_weights.to(hidden_states.dtype)
+
+        final_hidden_states = torch.zeros(
+            (batch_size, sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
+        )
+
+        padded_weights = torch.zeros(
+            (batch_size * sequence_length, self.num_experts), dtype=hidden_states.dtype, device=hidden_states.device
+        )
+        padded_weights.scatter_(-1, selected_experts, routing_weights)
+        padded_weights = padded_weights.reshape(-1, sequence_length, self.num_experts)
+        padded_weights = padded_weights.permute(2, 0, 1).unsqueeze(-1)
+
+        # Loop over all available experts in the model and perform the computation on each expert
+        for expert_idx in range(self.num_experts):
+            expert_layer = self.experts[expert_idx]
+            padded_weight = padded_weights[expert_idx]
+            current_state_static = hidden_states.reshape(-1, hidden_dim)
+            current_hidden_states_static = (
+                expert_layer(current_state_static).reshape(-1, sequence_length, hidden_dim) * padded_weight
+            )
+            final_hidden_states += current_hidden_states_static
+            # support long sequences exceeding 8192
+            if not self.training and sequence_length > 8192:
+                htcore.mark_step()
+        final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
+        return final_hidden_states, load_balancing_loss_func(
+            (router_logits,), self.num_experts, self.top_k
+        )  # ZY: let's directly output the loss to align what we have in ds
+
+    def forward(self, hidden_states: torch.Tensor):
+        if self.is_moe_layer:
+            return self._moe_foreward(hidden_states)
+        else:
+            return self.mlp(hidden_states), torch.tensor(0.0, device=hidden_states.device, dtype=hidden_states.dtype)
+
+
+class ArcticDecoderLayer(nn.Module):
+    def __init__(self, config: ArcticConfig, layer_idx: int, **kwargs):
+        super().__init__()
+        self.layer_idx = layer_idx
+        self.hidden_size = config.hidden_size
+        self.self_attn = ArcticAttention(config, layer_idx, **kwargs)
+        self.block_sparse_moe = ArcticMoE(config, layer_id=layer_idx, **kwargs)
+        self.input_layernorm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        self.parallel_attn_mlp_res = (
+            config.parallel_attn_mlp_res and self.block_sparse_moe.is_moe_layer
+        )  # add residual only when it is moe layer
+        if self.parallel_attn_mlp_res:
+            self.residual_layernorm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+            self.residual_mlp = ArcticMLP(
+                config,
+                is_residual_mlp=True,
+            )  # for the residual layer. always shard the base weight if doing deepspeed lora.
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        self.self_attn.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        cache_idx: Optional[int] = None,
+        **kwargs,
+    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        """
+        Modified from original Arctic forward
+        Changes:
+        - Add new arg cache_position
+        - Add new arg token_idx
+        - Add new arg reuse_cache
+        - Add new arg flash_attention_recompute
+        - Add new arg cache_idx
+
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+                `(batch, sequence_length)` where padding elements are indicated by 0.
+            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            use_cache (`bool`, *optional*):
+                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+                (see `past_key_values`).
+        """
+
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+
+        residual_input = hidden_states
+
+        hidden_states = self.input_layernorm(hidden_states)
+
+        # Self Attention
+        hidden_states, self_attn_weights, present_key_value = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            cache_position=cache_position,
+            token_idx=token_idx,
+            reuse_cache=reuse_cache,
+            flash_attention_recompute=flash_attention_recompute,
+            cache_idx=cache_idx,
+        )
+        hidden_states = residual_input + hidden_states
+
+        residual_attn = hidden_states
+
+        if self.parallel_attn_mlp_res:
+            # Note the architecture here is that the MOE layers reads the **pre-attention** input while there is a "normal" transformer residual part.
+            # This is to achieve better parallelization.
+
+            # residual mlp part
+
+            hidden_states = self.residual_layernorm(hidden_states)
+            hidden_states = self.residual_mlp(hidden_states)
+            residual_residual = residual_attn + hidden_states
+            # parallel mlp moe part
+            hidden_states = self.post_attention_layernorm(residual_input)  # parallel attn mlp has the same input
+            hidden_states, gate_loss = self.block_sparse_moe(hidden_states)
+            hidden_states = residual_residual + hidden_states
+        else:
+            hidden_states = self.post_attention_layernorm(hidden_states)
+            hidden_states, gate_loss = self.block_sparse_moe(hidden_states)
+            hidden_states = residual_attn + hidden_states
+
+        outputs = (hidden_states,)
+
+        if output_attentions:
+            outputs += (self_attn_weights,)
+
+        if use_cache:
+            outputs += (present_key_value,)
+
+        outputs += (gate_loss,)
+
+        return outputs
+
+
+ARCTIC_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+
+    Parameters:
+        config ([`ArcticConfig`]):
+            Model configuration class with all the parameters of the model. Initializing with a config file does not
+            load the weights associated with the model, only the configuration. Check out the
+            [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+
+@add_start_docstrings(
+    "The bare Arctic Model outputting raw hidden-states without any specific head on top.",
+    ARCTIC_START_DOCSTRING,
+)
+# Copied from transformers.models.mistral.modeling_mistral.MistralPreTrainedModel with Mistral->Arctic
+class ArcticPreTrainedModel(PreTrainedModel):
+    config_class = ArcticConfig
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["ArcticDecoderLayer"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_flash_attn_2 = True
+    _supports_sdpa = True
+    _supports_cache_class = True
+
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+
+
+MIXTRAL_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
+            `past_key_values`).
+
+            If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
+            and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
+            information on the default strategy.
+
+            - 1 indicates the head is **not masked**,
+            - 0 indicates the head is **masked**.
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.n_positions - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
+            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
+            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
+
+            Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
+            blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+
+            If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
+            don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
+            `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
+            model's internal embedding lookup matrix.
+        use_cache (`bool`, *optional*):
+            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+            `past_key_values`).
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+    "The bare Arctic Model outputting raw hidden-states without any specific head on top.",
+    ARCTIC_START_DOCSTRING,
+)
+# Copied from transformers.models.mistral.modeling_mistral.MistralModel with MISTRAL->MIXTRAL,Mistral->Arctic
+class ArcticModel(ArcticPreTrainedModel):
+    """
+    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`ArcticDecoderLayer`]
+
+    Args:
+        config: ArcticConfig
+    """
+
+    def __init__(self, config: ArcticConfig, **kwargs):
+        super().__init__(config)
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = nn.ModuleList(
+            [ArcticDecoderLayer(config, layer_idx, **kwargs) for layer_idx in range(config.num_hidden_layers)]
+        )
+        self._attn_implementation = config._attn_implementation
+        self.norm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        self.gradient_checkpointing = True
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        for layer in self.layers:
+            layer.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+    def get_input_embeddings(self):
+        return self.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.embed_tokens = value
+
+    # Ignore copy
+    @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        cache_idx: int = None,
+    ) -> Union[Tuple, MoeModelOutputWithPast]:
+        """
+        Modified from original Arctic forward
+        Changes:
+        - Add new arg cache_position
+        - Add new arg token_idx
+        - Add new arg reuse_cache
+        - Add new arg flash_attention_recompute
+        - Add new arg cache_idx
+        - Force legacy KV cache
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape
+        elif inputs_embeds is not None:
+            batch_size, seq_length, _ = inputs_embeds.shape
+        else:
+            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+        past_key_values_length = 0
+
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+
+        # NOTE: Forcing legacy cache for HPU
+        if past_key_values is not None and use_cache:
+            if reuse_cache:
+                past_key_values_length = past_key_values[0][0][2]
+            else:
+                past_key_values_length = past_key_values[0][0].shape[2]
+
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+            )
+            position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+        else:
+            position_ids = position_ids.view(-1, seq_length).long()
+
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+
+        if cache_position is None:
+            past_seen_tokens = 0
+            if past_key_values is not None:
+                if isinstance(past_key_values, Cache):
+                    past_seen_tokens = past_key_values.get_seq_length()
+                else:
+                    past_seen_tokens = past_key_values[0][0].shape[2]
+
+            cache_position = torch.arange(
+                past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
+            )
+
+        if position_ids is None:
+            position_ids = cache_position.unsqueeze(0)
+
+        if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
+            is_padding_right = attention_mask[:, -1].sum().item() != batch_size
+            if is_padding_right:
+                raise ValueError(
+                    "You are attempting to perform batched generation with padding_side='right'"
+                    " this may lead to unexpected behaviour for Flash Attention version of Arctic. Make sure to "
+                    " call `tokenizer.padding_side  = 'left'` before tokenizing the input. "
+                )
+
+        if self._attn_implementation == "flash_attention_2":
+            # 2d mask is passed through the layers
+            attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+        elif self._attn_implementation == "sdpa" and not output_attentions:
+            # output_attentions=True can not be supported when using SDPA, and we fall back on
+            # the manual implementation that requires a 4D causal mask in all cases.
+            attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
+                attention_mask,
+                (batch_size, seq_length),
+                inputs_embeds,
+                past_key_values_length,
+            )
+        else:
+            # 4d mask is passed through the layers
+            attention_mask = _prepare_4d_causal_attention_mask(
+                attention_mask,
+                (batch_size, seq_length),
+                inputs_embeds,
+                past_key_values_length,
+                sliding_window=self.config.sliding_window,
+            )
+
+        hidden_states = inputs_embeds
+
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        all_router_losses = ()
+        next_decoder_cache = () if use_cache else None
+
+        for i, decoder_layer in enumerate(self.layers):
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    decoder_layer.__call__,
+                    hidden_states,
+                    attention_mask,
+                    position_ids,
+                    past_key_values,
+                    output_attentions,
+                    use_cache,
+                    cache_position,
+                )
+            else:
+                layer_outputs = decoder_layer(
+                    hidden_states,
+                    attention_mask=attention_mask,
+                    position_ids=position_ids,
+                    past_key_value=None if past_key_values is None else past_key_values[i],
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                    cache_position=cache_position,
+                    token_idx=token_idx,
+                    reuse_cache=reuse_cache,
+                    flash_attention_recompute=flash_attention_recompute,
+                    cache_idx=cache_idx,
+                )
+
+            hidden_states = layer_outputs[0]
+
+            if use_cache:
+                next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+
+            all_router_losses += (layer_outputs[-1],)
+            htcore.mark_step()
+        hidden_states = self.norm(hidden_states)
+
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+
+        next_cache = None
+        if use_cache:
+            next_cache = (
+                next_decoder_cache.to_legacy_cache() if isinstance(next_decoder_cache, Cache) else next_decoder_cache
+            )
+
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_router_losses]
+                if v is not None
+            )
+        return MoeModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+            router_logits=all_router_losses,
+        )
+
+
+class ArcticForCausalLM(ArcticPreTrainedModel, GenerationMixin):
+    # TODO(jeffra): update _keys_to_ignore_on_load_unexpected with expert keys not relevant for this rank
+    _keys_to_ignore_on_load_unexpected = [
+        r"model\.layers\.\d+\.block_sparse_moe\.experts\.\d+\.w\d+\.weight"
+        r"model\.layers\.\d+\.block_sparse_moe\.gate\.weight"
+    ]
+    _keys_to_ignore_on_load_missing = [
+        r"model\.layers\.\d+\.block_sparse_moe\.mlp\.deepspeed_moe\.experts\.deepspeed_experts\.\d+\.w\d+\.weight",
+        r"model\.layers\.\d+\.block_sparse_moe\.mlp\.deepspeed_moe\.gate\.wg\.weight",
+    ]
+    _tied_weights_keys = []  # ["lm_head.weight"]
+
+    def __init__(self, config: ArcticConfig, **kwargs):
+        super().__init__(config)
+        self.model = ArcticModel(config, **kwargs)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        self.router_aux_loss_coef = config.router_aux_loss_coef
+        self.num_experts = config.num_local_experts
+        self.num_experts_per_tok = config.num_experts_per_tok
+        self.use_deepspeed_moe = kwargs.get(USE_DEEPSPEED_MOE_ARG, False)
+        self.moe_expert_parallel_size = kwargs.get(MOE_EXPERT_PARALLEL_SIZE_ARG, 1)
+        self.is_deepspeed_lora = kwargs.get(DEEPSPEED_LORA_CONFIG) is not None
+        self.gradient_checkpointing = True
+        # self.shard_base_weights_if_doing_lora = kwargs.get("shard_base_weights_if_doing_lora", False)
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        self.model.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+        self.kv_cache_len = max_seq_len
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+
+    def set_decoder(self, decoder):
+        self.model = decoder
+
+    def get_decoder(self):
+        return self.model
+
+    @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+    # Ignore copy
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = None,
+        flash_attention_recompute: Optional[bool] = False,
+        cache_idx: int = None,
+    ) -> Union[Tuple, MoeCausalLMOutputWithPast]:
+        r"""
+        Modified from original. Only differences are:
+        - Add new arg cache_position
+        - Add new arg token_idx
+        - Add new arg reuse_cache
+        - Add new arg flash_attention_recompute
+        - Add new arg cache_idx
+
+        Args:
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from transformers import AutoTokenizer, ArcticForCausalLM
+
+        >>> model = ArcticForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
+        >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
+
+        >>> prompt = "Hey, are you conscious? Can you talk to me?"
+        >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+        >>> # Generate
+        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
+        ```"""
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+        outputs = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            cache_position=cache_position,
+            token_idx=token_idx,
+            reuse_cache=reuse_cache,
+            flash_attention_recompute=flash_attention_recompute,
+            cache_idx=cache_idx,
+        )
+        hidden_states = outputs[0]
+        logits = self.lm_head(hidden_states)
+        logits = logits.float()
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            shift_logits = shift_logits.view(-1, self.config.vocab_size)
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            loss = loss_fct(shift_logits, shift_labels)
+
+        # Move to same device for model parallelism.
+        aux_loss = sum([out.to(logits.device) for out in outputs[-1]])
+        if labels is not None:
+            loss += self.router_aux_loss_coef * aux_loss
+
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            # torch.distributed.barrier()
+            return (loss,) + output if loss is not None else output
+
+        return MoeCausalLMOutputWithPast(
+            loss=loss,
+            aux_loss=aux_loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+    def prepare_inputs_for_generation(
+        self,
+        input_ids,
+        past_key_values=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        cache_position=None,
+        position_ids=None,
+        use_cache=True,
+        num_logits_to_keep=None,
+        **kwargs,
+    ):
+        """
+        Copied from GaudiMixtralForCausalLM in optimum/habana/transformers/models/mixtral/modeling_mixtral.py
+        """
+        reuse_cache = kwargs.get("reuse_cache")
+        token_idx = kwargs.get("token_idx", None)
+
+        # Omit tokens covered by past_key_values
+        if past_key_values is not None:
+            if token_idx is not None:
+                idx = token_idx + kwargs.get("inputs_embeds_offset", 0) - 1
+                input_ids = torch.index_select(input_ids, 1, idx)
+            else:
+                if inputs_embeds is not None:  # Exception 1
+                    input_ids = input_ids[:, -cache_position.shape[0] :]
+                elif (
+                    input_ids.shape[1] != cache_position.shape[0]
+                ):  # Default case (the "else", a no op, is Exception 2)
+                    input_ids = input_ids[:, cache_position]
+        elif reuse_cache and token_idx is not None:
+            # With reuse_cache, KV cache is pre allocated hence for the 1st token we can slice the inputs till token idx for the fwd pass
+            input_ids = input_ids[:, :token_idx]
+            attention_mask = attention_mask[:, :token_idx]
+
+        if attention_mask is not None and position_ids is None:
+            # create position_ids on the fly for batch generation
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            if past_key_values:
+                if token_idx is not None:
+                    position_ids = torch.index_select(position_ids, 1, token_idx - 1)
+                else:
+                    position_ids = position_ids[:, -input_ids.shape[1] :]
+
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+        if inputs_embeds is not None and past_key_values is None:
+            model_inputs = {"inputs_embeds": inputs_embeds}
+        else:
+            model_inputs = {"input_ids": input_ids.contiguous()}  # `contiguous()` needed for compilation use cases
+
+        if num_logits_to_keep is not None:
+            model_inputs["num_logits_to_keep"] = num_logits_to_keep
+
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "cache_position": cache_position,
+                "past_key_values": past_key_values,
+                "use_cache": use_cache,
+                "attention_mask": attention_mask,
+                "token_idx": token_idx,
+                "reuse_cache": reuse_cache,
+                "flash_attention_recompute": kwargs.get("flash_attention_recompute"),
+                "cache_idx": kwargs.get("cache_idx"),
+            }
+        )
+        return model_inputs
+
+    @staticmethod
+    def _reorder_cache(past_key_values, beam_idx):
+        reordered_past = ()
+        for layer_past in past_key_values:
+            reordered_past += (
+                tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
+            )
+        return reordered_past
+
+
+@add_start_docstrings(
+    """
+    The Arctic Model transformer with a sequence classification head on top (linear layer).
+
+    [`ArcticForSequenceClassification`] uses the last token in order to do the classification, as other causal models
+    (e.g. GPT-2) do.
+
+    Since it does classification on the last token, it requires to know the position of the last token. If a
+    `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
+    no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
+    padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
+    each row of the batch).
+    """,
+    ARCTIC_START_DOCSTRING,
+)
+# Copied from transformers.models.llama.modeling_llama.LlamaForSequenceClassification with Llama->Arctic, LLAMA->MIXTRAL
+class ArcticForSequenceClassification(ArcticPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.model = ArcticModel(config)
+        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        transformer_outputs = self.model(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        hidden_states = transformer_outputs[0]
+        logits = self.score(hidden_states)
+
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+        else:
+            batch_size = inputs_embeds.shape[0]
+
+        if self.config.pad_token_id is None and batch_size != 1:
+            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+        if self.config.pad_token_id is None:
+            sequence_lengths = -1
+        else:
+            if input_ids is not None:
+                # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
+                sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
+                sequence_lengths = sequence_lengths % input_ids.shape[-1]
+                sequence_lengths = sequence_lengths.to(logits.device)
+            else:
+                sequence_lengths = -1
+
+        pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+
+        loss = None
+        if labels is not None:
+            labels = labels.to(logits.device)
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(pooled_logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(pooled_logits, labels)
+        if not return_dict:
+            output = (pooled_logits,) + transformer_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=pooled_logits,
+            past_key_values=transformer_outputs.past_key_values,
+            hidden_states=transformer_outputs.hidden_states,
+            attentions=transformer_outputs.attentions,
+        )
+
+
+# Copied from optimum.habana.transformers.models.llama.modeling_llama:apply_customized_rope()
+def apply_customized_rope(q, k, cos, sin, position_ids, training=True):
+    if q.device.type == "hpu" and FusedRoPE is not None:
+        return apply_customized_rope_module(q, k, cos, sin, position_ids, training)
+    else:
+        # keep the same implementation as Transformers v4.37.2
+        return apply_rotary_pos_emb(q, k, cos[position_ids], sin[position_ids], position_ids)
diff --git a/optimum/habana/transformers/models/snowflake/tokenization_arctic.py b/optimum/habana/transformers/models/snowflake/tokenization_arctic.py
new file mode 100644
index 0000000000..8fbe2463b6
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/tokenization_arctic.py
@@ -0,0 +1,56 @@
+"""Tokenization classes for Arctic. Copied from https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d."""
+
+from typing import Any, Dict, Optional
+
+from transformers.models.llama.tokenization_llama import LlamaTokenizer
+
+
+class ArcticTokenizer(LlamaTokenizer):
+    def __init__(
+        self,
+        vocab_file,
+        unk_token="<unk>",
+        bos_token="<s>",
+        eos_token="</s>",
+        pad_token=None,
+        sp_model_kwargs: Optional[Dict[str, Any]] = None,
+        add_bos_token=True,
+        add_eos_token=False,
+        clean_up_tokenization_spaces=False,
+        use_default_system_prompt=False,
+        spaces_between_special_tokens=False,
+        legacy=False,
+        add_prefix_space=True,
+        **kwargs,
+    ):
+        # Same as LlamaTokenizer except default legacy=False.
+        super().__init__(
+            vocab_file,
+            bos_token=bos_token,
+            eos_token=eos_token,
+            unk_token=unk_token,
+            pad_token=pad_token,
+            sp_model_kwargs=sp_model_kwargs,
+            add_bos_token=add_bos_token,
+            add_eos_token=add_eos_token,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            use_default_system_prompt=use_default_system_prompt,
+            spaces_between_special_tokens=spaces_between_special_tokens,
+            legacy=legacy,
+            add_prefix_space=add_prefix_space,
+            **kwargs,
+        )
+
+    @property
+    def default_chat_template(self):
+        """
+        This template formats inputs in the standard Arctic format.
+        """
+        return (
+            "{% for message in messages %}"
+            "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
+            "{% endfor %}"
+            "{% if add_generation_prompt %}"
+            "{{ '<|im_start|>assistant\n' }}"
+            "{% endif %}"
+        )
diff --git a/optimum/habana/transformers/models/video_llava/__init__.py b/optimum/habana/transformers/models/video_llava/__init__.py
index 5c5f894d56..57831502dc 100644
--- a/optimum/habana/transformers/models/video_llava/__init__.py
+++ b/optimum/habana/transformers/models/video_llava/__init__.py
@@ -1,2 +1 @@
 from .modeling_video_llava import GaudiVideoLlavaForConditionalGeneration
-from .processing_video_llava import GaudiVideoLlavaProcessor
diff --git a/optimum/habana/transformers/models/video_llava/modeling_video_llava.py b/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
index 6670b23375..ec9a10f053 100644
--- a/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
+++ b/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
@@ -18,148 +18,17 @@
 
 import torch
 from torch import nn
-from transformers.modeling_outputs import BaseModelOutputWithPooling
 from transformers.models.video_llava.modeling_video_llava import (
     VideoLlavaCausalLMOutputWithPast,
-    VideoLlavaConfig,
     VideoLlavaForConditionalGeneration,
 )
-from transformers.utils import logging
+from transformers.utils import is_torchdynamo_compiling, logging
 
 
 logger = logging.get_logger(__name__)
 
 
 class GaudiVideoLlavaForConditionalGeneration(VideoLlavaForConditionalGeneration):
-    def __init__(self, config: VideoLlavaConfig):
-        super().__init__(config)
-        self.feature_offset = 0
-
-    def _merge_input_ids_with_visual_features(
-        self, visual_features, inputs_embeds, input_ids, attention_mask, labels, token_idx, num_frames=1
-    ):
-        r"""
-        Copied from VideoLlavaForConditionalGeneration._merge_input_ids_with_visual_features: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/modeling_video_llava.py
-        The only differences are:
-        - add new args token_idx
-        - add self.feature_offset param
-        """
-        num_images, num_image_patches, embed_dim = visual_features.shape
-        batch_size, sequence_length = input_ids.shape
-        last_token_idx = token_idx + self.feature_offset
-        left_padding = not torch.sum(input_ids[:, last_token_idx - 1] == torch.tensor(self.pad_token_id))
-        special_vision_token = self.config.video_token_index if num_frames > 1 else self.config.image_token_index
-
-        # 1. Create a mask to know where special image tokens are
-        special_image_token_mask = input_ids == special_vision_token
-        num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1)
-        # Compute the maximum embed dimension
-        max_seq_len = (num_special_image_tokens.max() * (num_image_patches * num_frames - 1)) + sequence_length
-        self.feature_offset = self.feature_offset + max_seq_len - sequence_length
-        batch_indices, non_image_indices = torch.where(input_ids != special_vision_token)
-
-        # 2. Compute the positions where text should be written
-        # Calculate new positions for text tokens in merged image-text sequence.
-        # `special_image_token_mask` identifies image tokens. Each image token will be replaced by `nb_text_tokens_per_images - 1` text tokens.
-        # `torch.cumsum` computes how each image token shifts subsequent text token positions.
-        # - 1 to adjust for zero-based indexing, as `cumsum` inherently increases indices by one.
-        new_token_positions = (
-            torch.cumsum((special_image_token_mask * (num_image_patches * num_frames - 1) + 1), dim=-1) - 1
-        )
-        nb_image_pad = max_seq_len - 1 - new_token_positions[:, -1]
-        if left_padding:
-            new_token_positions += nb_image_pad[:, None]  # offset for left padding
-        text_to_overwrite = new_token_positions[batch_indices, non_image_indices]
-
-        # 3. Create the full embedding, already padded to the maximum position
-        # expand input ids so that the second "merge" with videos does not fail
-        final_embedding = torch.zeros(
-            batch_size, max_seq_len, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device
-        )
-        final_attention_mask = torch.zeros(
-            batch_size, max_seq_len, dtype=attention_mask.dtype, device=inputs_embeds.device
-        )
-        final_input_ids = torch.full(
-            (batch_size, max_seq_len), self.pad_token_id, dtype=input_ids.dtype, device=inputs_embeds.device
-        )
-        # In case the Vision model or the Language model has been offloaded to CPU, we need to manually
-        # set the corresponding tensors into their correct target device.
-        target_device = inputs_embeds.device
-        batch_indices, non_image_indices, text_to_overwrite = (
-            batch_indices.to(target_device),
-            non_image_indices.to(target_device),
-            text_to_overwrite.to(target_device),
-        )
-        attention_mask = attention_mask.to(target_device)
-
-        # 4. Fill the embeddings based on the mask. If we have ["hey" "<image>", "how", "are"]
-        # we need to index copy on [0, 577, 578, 579] for the text and [1:576] for the image features
-        final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_image_indices]
-        final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_image_indices]
-        final_input_ids[batch_indices, text_to_overwrite] = input_ids[batch_indices, non_image_indices]
-        if labels is not None:
-            final_labels = torch.full(
-                (batch_size, max_seq_len), self.config.ignore_index, dtype=input_ids.dtype, device=input_ids.device
-            )
-            final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_image_indices]
-        else:
-            final_labels = None
-
-        # 5. Fill the embeddings corresponding to the images. Anything that is still zeros needs filling
-        image_to_overwrite = torch.full((batch_size, max_seq_len), True, dtype=torch.bool, device=inputs_embeds.device)
-        image_to_overwrite[batch_indices, text_to_overwrite] = False
-        image_to_overwrite &= image_to_overwrite.cumsum(-1) - 1 >= nb_image_pad[:, None].to(target_device)
-
-        if image_to_overwrite.sum() != visual_features.shape[:-1].numel():
-            visual_type = "videos" if num_frames == 8 else "images"
-            num_images //= num_frames
-            raise ValueError(
-                f"The input provided to the model are wrong. The number of {visual_type} tokens is {torch.sum(special_image_token_mask)} while"
-                f" the number of {visual_type} given to the model is {num_images}. This prevents correct indexing and breaks batch generation."
-            )
-
-        final_embedding[image_to_overwrite] = visual_features.contiguous().reshape(-1, embed_dim).to(target_device)
-        final_attention_mask |= image_to_overwrite
-        position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1)
-
-        return final_embedding, final_attention_mask, final_labels, position_ids, final_input_ids
-
-    def _get_vision_features(
-        self,
-        pixel_values_images: Optional[torch.FloatTensor] = None,
-        pixel_values_videos: Optional[torch.FloatTensor] = None,
-        vision_feature_layer: Optional[int] = None,
-        vision_feature_select_strategy: Optional[str] = None,
-    ) -> Union[Tuple, BaseModelOutputWithPooling]:
-        if pixel_values_images is None and pixel_values_videos is None:
-            raise ValueError("You have to specify `pixel_values_images` or `pixel_values_videos`")
-
-        # videos do not need to select features and it's always "full" (as it is done in the orig implementation)
-        if pixel_values_videos is not None:
-            batch_size_vid, num_frames, channels, height, width = pixel_values_videos.shape
-
-            pixel_values = pixel_values_videos.reshape(batch_size_vid * num_frames, channels, height, width)
-            video_outputs = self.video_tower(pixel_values, output_hidden_states=True)
-            video_outputs = video_outputs.hidden_states[vision_feature_layer].squeeze(1)
-        else:
-            video_outputs = None
-            num_frames = 0
-
-        if pixel_values_images is not None:
-            image_outputs = self.image_tower(pixel_values_images, output_hidden_states=True)
-            image_outputs = image_outputs.hidden_states[vision_feature_layer].squeeze(1)
-
-            if vision_feature_select_strategy == "default":
-                image_outputs = image_outputs[:, 1:]
-            elif vision_feature_select_strategy == "full":
-                image_outputs = image_outputs
-            else:
-                raise ValueError(f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}")
-        else:
-            image_outputs = None
-
-        return image_outputs, video_outputs, num_frames
-
     def forward(
         self,
         input_ids: Optional[torch.LongTensor] = None,
@@ -179,18 +48,12 @@ def forward(
         cache_position: Optional[torch.LongTensor] = None,
         logits_to_keep: Union[int, torch.Tensor] = 0,
         token_idx: Optional[torch.Tensor] = None,
-        **kwargs,
+        **lm_kwargs,
     ) -> Union[Tuple, VideoLlavaCausalLMOutputWithPast]:
         r"""
-        Copied from VideoLlavaForConditionalGeneration.forward: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/modeling_video_llava.py
-        The only differences are:
+        Copied from VideoLlavaForConditionalGeneration.forward: https://github.com/huggingface/transformers/blob/v4.51.3/src/transformers/models/video_llava/modeling_video_llava.py#L365
+        The only difference is:
         - add new args token_idx
-        - add new args attn_softmax_bf16
-        - add new args reuse_cache
-        - add new args use_flash_attention
-        - add new args flash_attention_recompute
-        - add new args flash_attention_causal_mask
-        - add new args flash_attention_fast_softmax
         """
 
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@@ -198,6 +61,59 @@ def forward(
             output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
         )
         return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        vision_feature_layer = (
+            vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
+        )
+        vision_feature_select_strategy = (
+            vision_feature_select_strategy
+            if vision_feature_select_strategy is not None
+            else self.config.vision_feature_select_strategy
+        )
+
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
+
+        if (pixel_values_images is not None or pixel_values_videos is not None) and inputs_embeds is not None:
+            raise ValueError(
+                "You cannot specify both `pixel_values_images`/`pixel_values_videos` and `inputs_embeds` at the same "
+                "time, and must specify either one"
+            )
+
+        if inputs_embeds is None:
+            inputs_embeds = self.get_input_embeddings()(input_ids)
+
+        if pixel_values_images is not None:
+            image_features = self.get_image_features(
+                pixel_values_images,
+                vision_feature_layer=vision_feature_layer,
+                vision_feature_select_strategy=vision_feature_select_strategy,
+            )
+            special_image_mask = (input_ids == self.config.image_token_index).unsqueeze(-1)
+            special_image_mask = special_image_mask.expand_as(inputs_embeds).to(inputs_embeds.device)
+            if not is_torchdynamo_compiling() and inputs_embeds[special_image_mask].numel() != image_features.numel():
+                n_image_tokens = (input_ids == self.config.image_token_index).sum()
+                n_image_features = image_features.shape[0] * image_features.shape[1]
+                raise ValueError(
+                    f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
+                )
+            image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
+            inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
+
+        if pixel_values_videos is not None:
+            video_features, num_frames = self.get_video_features(
+                pixel_values_videos=pixel_values_videos, vision_feature_layer=vision_feature_layer
+            )
+
+            special_image_mask = (input_ids == self.config.video_token_index).unsqueeze(-1)
+            special_image_mask = special_image_mask.expand_as(inputs_embeds).to(inputs_embeds.device)
+            if not is_torchdynamo_compiling() and inputs_embeds[special_image_mask].numel() != video_features.numel():
+                n_video_tokens = (input_ids == self.config.video_token_index).sum()
+                n_video_features = video_features.shape[0] * video_features.shape[1]
+                raise ValueError(
+                    f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {n_video_features}"
+                )
+            video_features = video_features.to(inputs_embeds.device, inputs_embeds.dtype)
+            inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, video_features)
 
         outputs = self.language_model(
             attention_mask=attention_mask,
@@ -209,14 +125,12 @@ def forward(
             output_hidden_states=output_hidden_states,
             return_dict=return_dict,
             cache_position=cache_position,
-            logits_to_keep=0,
+            logits_to_keep=logits_to_keep,
             token_idx=token_idx,
-            **kwargs,
+            **lm_kwargs,
         )
 
         logits = outputs[0]
-        if logits.shape[1] > 1:
-            logits = logits[:, self.feature_offset :, :]
 
         loss = None
         if labels is not None:
@@ -246,196 +160,6 @@ def forward(
             past_key_values=outputs.past_key_values,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
-            image_hidden_states=kwargs.get("image_features", None) if pixel_values_images is not None else None,
-            video_hidden_states=kwargs.get("video_features", None) if pixel_values_videos is not None else None,
-        )
-
-    def prepare_inputs_for_generation(
-        self,
-        input_ids,
-        past_key_values=None,
-        inputs_embeds=None,
-        pixel_values_images=None,
-        pixel_values_videos=None,
-        attention_mask=None,
-        cache_position=None,
-        logits_to_keep=None,
-        **kwargs,
-    ):
-        token_idx = kwargs.get("token_idx", None)
-        if token_idx is None:
-            return super().prepare_inputs_for_generation(
-                input_ids=input_ids,
-                past_key_values=past_key_values,
-                inputs_embeds=inputs_embeds,
-                pixel_values_images=pixel_values_images,
-                pixel_values_videos=pixel_values_videos,
-                attention_mask=attention_mask,
-                cache_position=cache_position,
-                logits_to_keep=logits_to_keep,
-                **kwargs,
-            )
-        # Else, we need to update token_idx when merging features from videos/images with input embeddings
-        labels = kwargs.get("labels", None)
-        if (input_ids is None) ^ (inputs_embeds is not None):
-            raise ValueError(
-                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
-            )
-
-        if (pixel_values_images is not None or pixel_values_videos is not None) and inputs_embeds is not None:
-            raise ValueError(
-                "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one"
-            )
-
-        legacy_processing = False
-        inputs_not_expanded = False
-        if input_ids is not None:
-            img_token_not_enough = (input_ids == self.config.image_token_index).sum(
-                1
-            ).max() < self.config.image_seq_length
-            video_token_not_enough = (input_ids == self.config.video_token_index).sum(
-                1
-            ).max() < self.config.video_seq_length
-            # if the number of image/video tokens is more than image embeddings seq length, then prob we expanded it in processing
-            # not very reliable, but we don't expect one to actually pass 500+ images for one prompt
-            inputs_not_expanded = (img_token_not_enough and pixel_values_images is not None) or (
-                video_token_not_enough and pixel_values_videos is not None
-            )
-        model_inputs = self.language_model.prepare_inputs_for_generation(
-            input_ids,
-            past_key_values=past_key_values,
-            inputs_embeds=inputs_embeds,
-            attention_mask=attention_mask,
-            cache_position=cache_position,
-            logits_to_keep=logits_to_keep,
-            **kwargs,
-        )
-        position_ids = model_inputs["position_ids"]
-        cache_position = model_inputs["cache_position"]
-        attention_mask = model_inputs["attention_mask"]
-        inputs_embeds = model_inputs.get("inputs_embeds", None)
-        input_ids = model_inputs.get("input_ids", None)
-
-        if inputs_embeds is None:
-            inputs_embeds = self.get_input_embeddings()(input_ids)
-            pixels_present = input_ids.shape[-1] == 1 and (
-                pixel_values_images is not None or pixel_values_videos is not None
-            )
-            legacy_processing = inputs_not_expanded or pixels_present
-
-        vision_feature_layer = kwargs.get("vision_feature_layer", None)
-        vision_feature_layer = (
-            vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
-        )
-        vision_feature_select_strategy = kwargs.get("vision_feature_select_strategy", None)
-        vision_feature_select_strategy = (
-            vision_feature_select_strategy
-            if vision_feature_select_strategy is not None
-            else self.config.vision_feature_select_strategy
-        )
-        if pixel_values_images is not None or pixel_values_videos is not None:
-            image_outputs, video_outputs, num_frames = self._get_vision_features(
-                pixel_values_images=pixel_values_images,
-                pixel_values_videos=pixel_values_videos,
-                vision_feature_layer=vision_feature_layer,
-                vision_feature_select_strategy=vision_feature_select_strategy,
-            )
-
-            image_features = video_features = None
-            if image_outputs is not None:
-                image_features = self.multi_modal_projector(image_outputs)
-            if video_outputs is not None:
-                video_features = self.multi_modal_projector(video_outputs)
-
-            if legacy_processing:
-                logger.warning_once(
-                    "Expanding inputs for image tokens in Video-LLaVa should be done in processing. "
-                    "Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
-                    "with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
-                    "Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
-                )
-                if input_ids.shape[1] != 1:
-                    self.feature_offset = 0
-                    for features, frames in ((image_features, 1), (video_features, num_frames)):
-                        if features is not None:
-                            (
-                                inputs_embeds,
-                                attention_mask,
-                                labels,
-                                position_ids,
-                                input_ids,
-                            ) = self._merge_input_ids_with_visual_features(
-                                features,
-                                inputs_embeds,
-                                input_ids,
-                                attention_mask,
-                                labels,
-                                token_idx,
-                                num_frames=frames,
-                            )
-                    cache_position = torch.arange(attention_mask.shape[1], device=attention_mask.device)
-                else:
-                    # Retrieve the first layer to inspect the logits and mask out the hidden states
-                    # that are set to 0
-                    first_layer_past_key_value = past_key_values[0][0][:, :, :, 0]
-
-                    # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941
-                    batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0)
-
-                    target_length = input_ids.shape[1]
-                    past_length = first_layer_past_key_value.shape[-1]
-
-                    extended_attention_mask = torch.ones(
-                        (attention_mask.shape[0], past_length),
-                        dtype=attention_mask.dtype,
-                        device=attention_mask.device,
-                    )
-
-                    # Filter out only the tokens that can be un-attended, this can happen
-                    # if one uses Llava + Fused modules where the cache on the
-                    # first iteration is already big enough, or if one passes custom cache
-                    valid_indices = non_attended_tokens < extended_attention_mask.size(-1)
-                    new_batch_index = batch_index[valid_indices]
-                    new_non_attended_tokens = non_attended_tokens[valid_indices]
-
-                    # Zero-out the places where we don't need to attend
-                    extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0
-                    new_token_idx = token_idx + self.feature_offset
-                    extended_attention_mask[:, new_token_idx - 1 + target_length :] = 0
-                    attention_mask = extended_attention_mask.clone()
-                    position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1
-                    cache_position = new_token_idx
-
-            # TODO: @raushan retain only the new behavior after v4.47
-            else:
-                if image_outputs is not None:
-                    special_image_mask = (
-                        (input_ids == self.config.image_token_index).unsqueeze(-1).expand_as(inputs_embeds)
-                    )
-                    image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
-                    inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
-
-                if video_outputs is not None:
-                    special_image_mask = (
-                        (input_ids == self.config.video_token_index).unsqueeze(-1).expand_as(inputs_embeds)
-                    )
-                    video_features = video_features.to(inputs_embeds.device, inputs_embeds.dtype)
-                    inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, video_features)
-
-        model_inputs.update(
-            {
-                "position_ids": position_ids,
-                "cache_position": cache_position,
-                "attention_mask": attention_mask,
-                "token_idx": token_idx + self.feature_offset,
-                "inputs_embeds": inputs_embeds,
-            }
+            image_hidden_states=image_features if pixel_values_images is not None else None,
+            video_hidden_states=video_features if pixel_values_videos is not None else None,
         )
-        if legacy_processing or (cache_position is not None and cache_position[0]) == 0:
-            # If we're in cached decoding stage, pixel values should be None because input ids do not contain special image token anymore
-            # Otherwise we need pixel values to be passed to model
-            model_inputs["pixel_values_images"] = pixel_values_images
-            model_inputs["pixel_values_videos"] = pixel_values_videos
-            model_inputs["image_features"] = image_features
-            model_inputs["video_features"] = video_features
-        return model_inputs
diff --git a/optimum/habana/transformers/models/video_llava/processing_video_llava.py b/optimum/habana/transformers/models/video_llava/processing_video_llava.py
deleted file mode 100644
index 9ab480220c..0000000000
--- a/optimum/habana/transformers/models/video_llava/processing_video_llava.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from typing import List, Optional, Union
-
-from transformers.image_processing_utils import BatchFeature
-from transformers.image_utils import ImageInput, get_image_size, to_numpy_array
-from transformers.models.video_llava.processing_video_llava import VideoLlavaProcessor
-from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
-from transformers.utils import TensorType
-
-from optimum.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-
-class GaudiVideoLlavaProcessor(VideoLlavaProcessor):
-    attributes = ["image_processor", "tokenizer"]
-    valid_kwargs = ["chat_template", "patch_size", "vision_feature_select_strategy", "image_token", "video_token"]
-    image_processor_class = "VideoLlavaImageProcessor"
-    tokenizer_class = "AutoTokenizer"
-
-    def __init__(
-        self,
-        image_processor=None,
-        tokenizer=None,
-        patch_size=None,
-        vision_feature_select_strategy=None,
-        image_token="<image>",  # set the default and let users change if they have peculiar special tokens in rare cases
-        video_token="<video>",
-        chat_template=None,
-        **kwargs,
-    ):
-        r"""
-        Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/processing_video_llava.py#L61
-        except move super().__init__ to the top of the function so that it will not change the default value for self.patch_size and self.vision_feature_select_strategy
-        """
-        super().__init__(image_processor, tokenizer, chat_template=chat_template)
-        self.patch_size = patch_size
-        self.vision_feature_select_strategy = vision_feature_select_strategy
-        self.image_token = image_token
-        self.video_token = video_token
-
-    def __call__(
-        self,
-        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
-        images: ImageInput = None,
-        videos: ImageInput = None,
-        padding: Union[bool, str, PaddingStrategy] = False,
-        truncation: Union[bool, str, TruncationStrategy] = None,
-        max_length=None,
-        return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH,
-    ) -> BatchFeature:
-        r"""
-        Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/processing_video_llava.py#L78
-        Here we use the processor logit in transformers 4.45.2 to avoid the performance regression for op `masked_scatter` in transformers 4.49 on Gaudi3
-        """
-        data = {}
-        if images is not None or videos is not None:
-            encoded_images = self.image_processor(images=images, videos=videos, return_tensors=return_tensors)
-            data.update(encoded_images)
-
-        if isinstance(text, str):
-            text = [text]
-        elif not isinstance(text, list) and not isinstance(text[0], str):
-            raise ValueError("Invalid input text. Please provide a string, or a list of strings")
-
-        prompt_strings = text
-        if encoded_images is not None and (self.patch_size is None or self.vision_feature_select_strategy is None):
-            logger.warning_once(
-                "Expanding inputs for image tokens in Video-LLaVa should be done in processing. "
-                "Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
-                "with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
-                "Using processors without these attributes in the config is deprecated and will throw an error in v4.44."
-            )
-        # Replace the image/video tokens with the expanded token sequence
-        elif encoded_images is not None:
-            if "pixel_values_images" in encoded_images.keys():
-                height, width = get_image_size(to_numpy_array(encoded_images.get("pixel_values_images")[0]))
-                num_frames = 1
-
-            if "pixel_values_videos" in encoded_images.keys():
-                one_video = to_numpy_array(encoded_images.get("pixel_values_videos")[0])
-                height, width = get_image_size(one_video[0])
-                num_frames = one_video.shape[0]  # frame dim is always after batch dim
-
-            num_image_tokens = (height // self.patch_size) * (width // self.patch_size) + 1
-            num_video_tokens = num_image_tokens * num_frames
-
-            num_image_tokens = (height // self.patch_size) * (width // self.patch_size) + 1
-            num_video_tokens = num_image_tokens * num_frames
-            if self.vision_feature_select_strategy == "default":
-                num_image_tokens -= 1
-
-            prompt_strings = []
-            for sample in text:
-                sample = sample.replace(self.image_token, self.image_token * num_image_tokens)
-                sample = sample.replace(self.video_token, self.video_token * num_video_tokens)
-                prompt_strings.append(sample)
-
-        text_inputs = self.tokenizer(
-            prompt_strings,
-            return_tensors=return_tensors,
-            padding=padding,
-            truncation=truncation,
-            max_length=max_length,
-        )
-        data.update(text_inputs)
-
-        return BatchFeature(data=data)
diff --git a/optimum/habana/utils/features.py b/optimum/habana/utils/features.py
index 6b9f501116..951ee1f56b 100644
--- a/optimum/habana/utils/features.py
+++ b/optimum/habana/utils/features.py
@@ -71,7 +71,7 @@ class IsSynapsePublicVersion(Feature):
     """
 
     def __init__(self):
-        super().__init__(SynapseVersionRange(">=1.20.0", "<1.21.0"))
+        super().__init__(SynapseVersionRange(">=1.22.0", "<1.23.0"))
 
 
 class IsSynapseUnreleasedVersion(Feature):
@@ -80,7 +80,7 @@ class IsSynapseUnreleasedVersion(Feature):
     """
 
     def __init__(self):
-        super().__init__(SynapseVersionRange(">=1.21.0"))
+        super().__init__(SynapseVersionRange(">=1.23.0"))
 
 
 @cache
diff --git a/optimum/habana/utils/misc.py b/optimum/habana/utils/misc.py
index 4239fabebc..30102a7bd1 100755
--- a/optimum/habana/utils/misc.py
+++ b/optimum/habana/utils/misc.py
@@ -32,7 +32,7 @@
 logger = logging.get_logger(__name__)
 
 
-CURRENTLY_VALIDATED_SYNAPSE_VERSION = version.parse("1.21.0")
+CURRENTLY_VALIDATED_SYNAPSE_VERSION = version.parse("1.22.0")
 
 
 def to_device_dtype(my_input: Any, target_device: torch.device = None, target_dtype: torch.dtype = None):
diff --git a/setup.py b/setup.py
index 9c414eff33..225b4e9536 100644
--- a/setup.py
+++ b/setup.py
@@ -36,6 +36,7 @@
     "diffusers >= 0.34.0, < 0.34.1",
     "huggingface_hub[hf_xet] >= 0.24.7",
     "sentence-transformers == 3.3.1",
+    "sentencepiece",
 ]
 
 TESTS_REQUIRE = [
@@ -44,7 +45,7 @@
     "GitPython",
     "optuna",
     "sentencepiece",
-    "datasets",
+    "datasets == 3.6.0",
     "timm",
     "safetensors",
     "pytest < 8.0.0",
@@ -52,6 +53,7 @@
     "torchsde",
     "timm",
     "peft",
+    "bitsandbytes",
     "tiktoken",
     "blobfile",
 ]
diff --git a/tests/baselines/fixture/tests/test_bnb_inference.json b/tests/baselines/fixture/tests/test_bnb_inference.json
deleted file mode 100644
index 2056f34d94..0000000000
--- a/tests/baselines/fixture/tests/test_bnb_inference.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-  "tests/test_bnb_inference.py::test_nf4_quantization_inference": {
-    "output": "Hello my name is Kelsey and I am a 16 year old girl who loves to draw and paint. I have"
-  }
-}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_bnb_qlora.json b/tests/baselines/fixture/tests/test_bnb_qlora.json
index 6e448772de..2075261672 100644
--- a/tests/baselines/fixture/tests/test_bnb_qlora.json
+++ b/tests/baselines/fixture/tests/test_bnb_qlora.json
@@ -1,10 +1,50 @@
 {
-  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning": {
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning[True-meta-llama/Llama-3.2-1B-8-8]": {
     "gaudi2": {
       "eval_loss": 1.225
     },
     "gaudi3": {
       "eval_loss": 1.225
     }
+  },
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning[False-meta-llama/Llama-3.2-1B-8-8]": {
+    "gaudi2": {
+      "eval_loss": 1.225
+    },
+    "gaudi3": {
+      "eval_loss": 1.225
+    }
+  },
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning[True-meta-llama/Llama-3.1-8B-4-4]": {
+    "gaudi2": {
+      "eval_loss": 1.044
+    },
+    "gaudi3": {
+      "eval_loss": 1.044
+    }
+  },
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning[False-meta-llama/Llama-3.1-8B-4-4]": {
+    "gaudi2": {
+      "eval_loss": 1.044
+    },
+    "gaudi3": {
+      "eval_loss": 1.044
+    }
+  },
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning[True-meta-llama/Llama-3.1-70B-1-1]": {
+    "gaudi2": {
+      "eval_loss": 0.961
+    },
+    "gaudi3": {
+      "eval_loss": 0.961
+    }
+  },
+  "tests/test_bnb_qlora.py::test_nf4_quantization_finetuning[False-meta-llama/Llama-3.1-70B-1-1]": {
+    "gaudi2": {
+      "eval_loss": 0.961
+    },
+    "gaudi3": {
+      "eval_loss": 0.961
+    }
   }
 }
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_text_generation_example.json b/tests/baselines/fixture/tests/test_text_generation_example.json
index 0e7b7c6d65..67a0343d97 100644
--- a/tests/baselines/fixture/tests/test_text_generation_example.json
+++ b/tests/baselines/fixture/tests/test_text_generation_example.json
@@ -688,5 +688,17 @@
       "output": "DeepSpeed is a machine learning framework that enables the training of large-scale models with reduced computational resources. It achieves this by using a technique called model parallelism, which allows the model to be split across multiple GPUs. This makes it possible to train models that are too large to fit on a single GPU.\n## What is DeepSpeed?\nDeepSpeed is a machine learning framework that enables the training of large-scale models with reduced computational resources. It achieves this by using a technique called model parallelism, which allows the model to be split across multiple GPUs",
       "throughput": 94.70370546821054
     }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bnb[unsloth/Meta-Llama-3.1-70B-bnb-4bit-1-20-False-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that makes distributed training easy, efficient, and flexible. DeepSpeed can train BERT-Large on",
+      "throughput": 0.7572952
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bnb[meta-llama/Llama-3.1-70B-1-20-True-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that makes distributed training easy, efficient, and effective. It is a deep learning optimization library that makes",
+      "throughput": 0.7583387
+    }
   }
 }
\ No newline at end of file
diff --git a/tests/example_diff/run_audio_classification.txt b/tests/example_diff/run_audio_classification.txt
index b0055ed170..eeff0a8548 100644
--- a/tests/example_diff/run_audio_classification.txt
+++ b/tests/example_diff/run_audio_classification.txt
@@ -36,7 +36,7 @@
 ---
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 173,175d175
 <     freeze_feature_extractor: Optional[bool] = field(
 <         default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."}
diff --git a/tests/example_diff/run_clip.txt b/tests/example_diff/run_clip.txt
index be2fe6baca..5773397c09 100644
--- a/tests/example_diff/run_clip.txt
+++ b/tests/example_diff/run_clip.txt
@@ -31,7 +31,7 @@
 ---
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 176a186,188
 >     mediapipe_dataloader: bool = field(
 >         default=False, metadata={"help": "Turn on MediaPipe hardware-based accelerated data loading."}
diff --git a/tests/example_diff/run_clm.txt b/tests/example_diff/run_clm.txt
index 8f50b571ba..3cdaaacc2c 100644
--- a/tests/example_diff/run_clm.txt
+++ b/tests/example_diff/run_clm.txt
@@ -40,7 +40,7 @@
 62a64,69
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 > 
diff --git a/tests/example_diff/run_glue.txt b/tests/example_diff/run_glue.txt
index a332e54bac..cff930246a 100644
--- a/tests/example_diff/run_glue.txt
+++ b/tests/example_diff/run_glue.txt
@@ -30,7 +30,7 @@
 > 
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 66,67d76
 < logger = logging.getLogger(__name__)
 < 
diff --git a/tests/example_diff/run_image_classification.txt b/tests/example_diff/run_image_classification.txt
index e42e130c2b..b1148c2c90 100644
--- a/tests/example_diff/run_image_classification.txt
+++ b/tests/example_diff/run_image_classification.txt
@@ -31,7 +31,7 @@
 ---
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 184c193
 <     parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
 ---
diff --git a/tests/example_diff/run_mlm.txt b/tests/example_diff/run_mlm.txt
index 5ae5abda17..86169b33ad 100644
--- a/tests/example_diff/run_mlm.txt
+++ b/tests/example_diff/run_mlm.txt
@@ -37,7 +37,7 @@
 > 
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 > 
diff --git a/tests/example_diff/run_qa.txt b/tests/example_diff/run_qa.txt
index 9293643ea3..a89044e356 100644
--- a/tests/example_diff/run_qa.txt
+++ b/tests/example_diff/run_qa.txt
@@ -34,7 +34,7 @@
 57a62,67
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
 > 
diff --git a/tests/example_diff/run_seq2seq_qa.txt b/tests/example_diff/run_seq2seq_qa.txt
index 73e5f5777c..08015ee735 100644
--- a/tests/example_diff/run_seq2seq_qa.txt
+++ b/tests/example_diff/run_seq2seq_qa.txt
@@ -31,7 +31,7 @@
 53a58,63
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
 > 
diff --git a/tests/example_diff/run_speech_recognition_ctc.txt b/tests/example_diff/run_speech_recognition_ctc.txt
index bb62540d08..8c214c6b36 100644
--- a/tests/example_diff/run_speech_recognition_ctc.txt
+++ b/tests/example_diff/run_speech_recognition_ctc.txt
@@ -32,7 +32,7 @@
 58a61,66
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
 > 
diff --git a/tests/example_diff/run_speech_recognition_seq2seq.txt b/tests/example_diff/run_speech_recognition_seq2seq.txt
index cd35ec0718..23664d44dc 100644
--- a/tests/example_diff/run_speech_recognition_seq2seq.txt
+++ b/tests/example_diff/run_speech_recognition_seq2seq.txt
@@ -29,7 +29,7 @@
 < check_min_version("4.52.0.dev0")
 ---
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 112c121
 <     forced_decoder_ids: list[list[int]] = field(
 ---
diff --git a/tests/example_diff/run_summarization.txt b/tests/example_diff/run_summarization.txt
index caf7d53b6a..f444536da3 100644
--- a/tests/example_diff/run_summarization.txt
+++ b/tests/example_diff/run_summarization.txt
@@ -50,7 +50,7 @@
 ---
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
 > 
diff --git a/tests/example_diff/run_translation.txt b/tests/example_diff/run_translation.txt
index 1f7c845aa4..f4a940d239 100644
--- a/tests/example_diff/run_translation.txt
+++ b/tests/example_diff/run_translation.txt
@@ -31,7 +31,7 @@
 59a64,69
 > # Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
 > check_min_version("4.51.0")
-> check_optimum_habana_min_version("1.18.0.dev0")
+> check_optimum_habana_min_version("1.19.0.dev0")
 > 
 > require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
 > 
diff --git a/tests/test_bnb_inference.py b/tests/test_bnb_inference.py
deleted file mode 100644
index 8898e3bd32..0000000000
--- a/tests/test_bnb_inference.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# coding=utf-8
-# Copyright 2022 the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-import os
-
-import pytest
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-
-from .utils import OH_DEVICE_CONTEXT
-
-
-MODEL_ID = "meta-llama/Llama-3.2-1B"
-
-
-def get_model(token: str):
-    nf4_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.bfloat16,
-    )
-
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID, quantization_config=nf4_config, device_map={"": "hpu"}, torch_dtype=torch.bfloat16, token=token.value
-    )
-
-    return model
-
-
-@pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="execution not supported on gaudi1")
-def test_nf4_quantization_inference(token: str, baseline):
-    os.environ["PT_HPU_LAZY_MODE"] = "0"
-    from optimum.habana.transformers import modeling_utils
-
-    modeling_utils.adapt_transformers_to_gaudi()
-
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value)
-
-    model = get_model(token)
-
-    generation_config = copy.deepcopy(model.generation_config)
-    generation_config.max_new_tokens = 20
-    generation_config.use_cache = True
-    generation_config.use_flash_attention = True
-
-    input_text = "Hello my name is"
-    inputs = tokenizer(input_text, return_tensors="pt").to(device="hpu")
-
-    torch.manual_seed(42)
-    outputs = model.generate(**inputs, generation_config=generation_config, lazy_mode=False)
-    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-
-    baseline.assertEqual(output=decoded_output)
diff --git a/tests/test_bnb_qlora.py b/tests/test_bnb_qlora.py
index 610645939d..3ca1da4348 100644
--- a/tests/test_bnb_qlora.py
+++ b/tests/test_bnb_qlora.py
@@ -26,9 +26,6 @@
 from .utils import OH_DEVICE_CONTEXT
 
 
-MODEL_ID = "meta-llama/Llama-3.2-1B"
-
-
 def print_model_size(model):
     """
     Prints the model size in GB.
@@ -59,12 +56,12 @@ def get_data(tokenizer, dataset_name, max_seq_length=1024):
     data = dataset.map(
         lambda example: tokenizer(example["text"], max_length=max_seq_length, padding="max_length"), batched=True
     )
-    split_data = data["train"].train_test_split(test_size=0.1, seed=42)
+    split_data = data["train"].train_test_split(test_size=0.01, seed=42)
 
     return split_data
 
 
-def get_model(token: str):
+def get_model(token: str, model_id: str):
     nf4_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_quant_type="nf4",
@@ -72,25 +69,42 @@ def get_model(token: str):
     )
 
     model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID, quantization_config=nf4_config, device_map={"": "hpu"}, torch_dtype=torch.bfloat16, token=token.value
+        model_id, quantization_config=nf4_config, device_map={"": "hpu"}, torch_dtype=torch.bfloat16, token=token.value
     )
 
     return model
 
 
+modeldata = [
+    ("meta-llama/Llama-3.2-1B", 8, 8),
+    ("meta-llama/Llama-3.1-8B", 4, 4),
+    ("meta-llama/Llama-3.1-70B", 1, 1),
+]
+
+
 @pytest.mark.skipif("gaudi1" == OH_DEVICE_CONTEXT, reason="execution not supported on gaudi1")
-def test_nf4_quantization_finetuning(token: str, baseline):
+@pytest.mark.parametrize("model_id, train_bs, eval_bs", modeldata)
+@pytest.mark.parametrize("compile_on", [True, False])
+def test_nf4_quantization_finetuning(
+    token: str, baseline, model_id: str, train_bs: int, eval_bs: int, compile_on: bool
+):
     os.environ["PT_HPU_LAZY_MODE"] = "0"
     from optimum.habana.transformers import modeling_utils
 
     modeling_utils.adapt_transformers_to_gaudi()
 
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value, padding_side="right")
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=token.value, padding_side="right")
     # needed for llama tokenizer
     tokenizer.pad_token = tokenizer.eos_token
 
-    model = get_model(token)
+    model = get_model(token, model_id)
     model.gradient_checkpointing_enable()
+    model.generation_config.use_flash_attention = True
+    model.generation_config.flash_attention_recompute = False
+    model.generation_config.flash_attention_causal_mask = True
+    model.generation_config.attn_softmax_bf16 = True
+    model.generation_config.use_fused_rope = True
+    is_large_model = "70B" in model_id
     print_model_size(model)
 
     model = prepare_model_for_kbit_training(model)
@@ -118,9 +132,9 @@ def test_nf4_quantization_finetuning(token: str, baseline):
     )
 
     training_args = GaudiTrainingArguments(
-        eval_strategy="steps",
-        per_device_train_batch_size=8,
-        per_device_eval_batch_size=8,
+        eval_strategy="no",
+        per_device_train_batch_size=train_bs,
+        per_device_eval_batch_size=eval_bs,
         gradient_accumulation_steps=2,
         max_steps=50,
         eval_steps=10,
@@ -131,9 +145,12 @@ def test_nf4_quantization_finetuning(token: str, baseline):
         lr_scheduler_type="linear",
         use_habana=True,
         use_lazy_mode=False,
-        pipelining_fwd_bwd=True,
         adjust_throughput=True,
-        throughput_warmup_steps=2,
+        throughput_warmup_steps=3,
+        torch_compile=compile_on,
+        torch_compile_backend="hpu_backend" if compile_on else None,
+        gradient_checkpointing=is_large_model,
+        use_regional_compilation=compile_on and is_large_model,
     )
 
     trainer = GaudiTrainer(
@@ -149,7 +166,7 @@ def test_nf4_quantization_finetuning(token: str, baseline):
     trainer.train()
 
     baseline.assertRef(
-        compare=lambda actual, ref: abs(actual - ref) < 5e2,
+        compare=lambda actual, ref: abs(actual - ref) < 5e-2,
         context=[OH_DEVICE_CONTEXT],
         eval_loss=trainer.evaluate()["eval_loss"],
     )
diff --git a/tests/test_examples.py b/tests/test_examples.py
index 7c27135d44..b44f2c198d 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -447,10 +447,13 @@ def test(self):
                         self.assertGreaterEqual(results["accuracy"], baseline)
                 return
             elif self.EXAMPLE_NAME == "run_clip":
-                if os.environ.get("DATA_CACHE", "") == "":
+                coco_config = self._load_dataset_config().get("coco", {})
+
+                if not coco_config.get("dataset_dir"):
                     from .clip_coco_utils import COCO_URLS, download_files
 
                     download_files(COCO_URLS)
+
                 from .clip_coco_utils import create_clip_roberta_model
 
                 create_clip_roberta_model()
@@ -512,8 +515,8 @@ def test(self):
                 env_variables["PT_HPU_LAZY_MODE"] = "0"
                 if "--use_hpu_graphs_for_inference" in extra_command_line_arguments:
                     extra_command_line_arguments.remove("--use_hpu_graphs_for_inference")
-            if os.environ.get("DATA_CACHE", "") != "" and self.EXAMPLE_NAME == "run_clip":
-                extra_command_line_arguments[0] = "--data_dir {}".format(os.environ["DATA_CACHE"])
+
+            extra_command_line_arguments += self._get_dataset_args()
 
             if torch_compile and (
                 model_name == "bert-large-uncased-whole-word-masking"
@@ -784,6 +787,56 @@ def check(actual, ref):
 
         assert passed, "One or more metrics failed"
 
+    def _load_dataset_config(self) -> dict:
+        config_str = os.environ.get("DATASET_CONFIG")
+        if not config_str:
+            return {}
+        try:
+            return json.loads(config_str)
+        except json.JSONDecodeError as e:
+            raise RuntimeError("Invalid JSON in DATASET_CONFIG") from e
+
+    def _get_dataset_args(self) -> List[str]:
+        dataset_config = self._load_dataset_config()
+        if not dataset_config:
+            return []
+
+        example_paths = {
+            "run_clip": "coco",
+            "run_speech_recognition_ctc": "libri",
+        }
+
+        dataset_key = example_paths.get(self.EXAMPLE_NAME)
+        if dataset_key is None:
+            return []
+
+        dataset_info = dataset_config.get(dataset_key)
+        if not dataset_info:
+            raise RuntimeError(f"No dataset_info found for EXAMPLE_NAME: {self.EXAMPLE_NAME}")
+
+        handler_map = {
+            "coco": self._get_clip_dataset_args,
+            "libri": self._get_speech_dataset_args,
+        }
+
+        return handler_map[dataset_key](dataset_info)
+
+    def _get_clip_dataset_args(self, dataset_info: dict) -> List[str]:
+        try:
+            return [f"--data_dir {dataset_info['dataset_dir']}"]
+        except KeyError as e:
+            raise RuntimeError(f"Missing key in dataset_info: {e}")
+
+    def _get_speech_dataset_args(self, dataset_info: dict) -> List[str]:
+        try:
+            base_dir = dataset_info["dataset_dir"]
+            return [
+                f"--dataset_name {os.path.join(base_dir, dataset_info['dataset_script'])}",
+                f"--dataset_dir {os.path.join(base_dir, dataset_info['dataset_data'])}",
+            ]
+        except KeyError as e:
+            raise RuntimeError(f"Missing key in dataset_info: {e}")
+
 
 class TextClassificationExampleTester(ExampleTesterBase, metaclass=ExampleTestMeta, example_name="run_glue"):
     TASK_NAME = "mrpc"
@@ -867,7 +920,6 @@ class MultiCardSpeechRecognitionExampleTester(
     ExampleTesterBase, metaclass=ExampleTestMeta, example_name="run_speech_recognition_ctc", multi_card=True
 ):
     TASK_NAME = "regisss/librispeech_asr_for_optimum_habana_ci"
-    DATASET_NAME = os.environ.get("DATA_CACHE")
 
 
 class MultiCardSummarizationExampleTester(
diff --git a/tests/test_image_to_text_example.py b/tests/test_image_to_text_example.py
index de805dffd2..f58e097d33 100644
--- a/tests/test_image_to_text_example.py
+++ b/tests/test_image_to_text_example.py
@@ -32,8 +32,8 @@
         "fp8": [
             # ("llava-hf/llava-1.5-7b-hf", 1),
             # ("llava-hf/llava-1.5-13b-hf", 1),
-            ("llava-hf/llava-v1.6-mistral-7b-hf", 1),
-            ("llava-hf/llava-v1.6-vicuna-7b-hf", 1),
+            pytest.param("llava-hf/llava-v1.6-mistral-7b-hf", 1, marks=pytest.mark.x2),
+            pytest.param("llava-hf/llava-v1.6-vicuna-7b-hf", 1, marks=pytest.mark.x2),
             pytest.param("llava-hf/llava-v1.6-vicuna-13b-hf", 1, marks=pytest.mark.x8),
         ],
     }
diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py
index b058506a85..3bb78d34d6 100644
--- a/tests/test_text_generation_example.py
+++ b/tests/test_text_generation_example.py
@@ -92,6 +92,10 @@
         "load_quantized_model_with_autoawq": [
             ("TheBloke/Llama-2-7b-Chat-AWQ", 1, 10, False, 128, 2048),
         ],
+        "run_model_with_bnb": [
+            ("unsloth/Meta-Llama-3.1-70B-bnb-4bit", 1, 20, False, True),
+            ("meta-llama/Llama-3.1-70B", 1, 20, True, True),
+        ],
         "deepspeed": [
             pytest.param("bigscience/bloomz", 8, 1, marks=pytest.mark.x8),
             # pytest.param("meta-llama/Llama-2-70b-hf", 8, 1, marks=pytest.mark.x8),
@@ -143,6 +147,7 @@
         "fp8": [],
         "load_quantized_model_with_autogptq": [],
         "load_quantized_model_with_autoawq": [],
+        "run_model_with_bnb": [],
         "deepspeed": [
             ("bigscience/bloomz-7b1", 8, 1),
         ],
@@ -168,6 +173,7 @@ def _test_text_generation(
     fp8: bool = False,
     load_quantized_model_with_autogptq: bool = False,
     load_quantized_model_with_autoawq: bool = False,
+    quantize_with_bnb: bool = False,
     max_input_tokens: int = 0,
     max_output_tokens: int = 100,
     parallel_strategy: str = None,
@@ -175,6 +181,7 @@ def _test_text_generation(
     num_beams: int = 1,
     num_return_sequences: int = 1,
     check_output: bool = False,
+    regional_compile: bool = False,
 ):
     command = ["python3"]
     path_to_example_dir = Path(__file__).resolve().parent.parent / "examples"
@@ -239,6 +246,8 @@ def _test_text_generation(
         if parallel_strategy == "tp":
             command += ["--use_flash_attention"]
             command += ["--flash_attention_recompute"]
+        if regional_compile:
+            command += ["--regional_compile"]
         env_variables["PT_ENABLE_INT64_SUPPORT"] = "1"
         env_variables["PT_HPU_LAZY_MODE"] = "0"
     else:
@@ -307,6 +316,8 @@ def _test_text_generation(
         command += ["--load_quantized_model_with_autogptq"]
     if load_quantized_model_with_autoawq:
         command += ["--load_quantized_model_with_autoawq"]
+    if quantize_with_bnb:
+        command += ["--quantize_with_bnb"]
     if parallel_strategy is not None:
         command += [
             f"--parallel_strategy={parallel_strategy}",
@@ -499,6 +510,33 @@ def test_text_generation_awq(
     )
 
 
+@pytest.mark.skipif(condition=bool("gaudi1" == OH_DEVICE_CONTEXT), reason=f"Skipping test for {OH_DEVICE_CONTEXT}")
+@pytest.mark.parametrize(
+    "model_name, world_size, output_len, quantize_with_bnb, check_output",
+    MODELS_TO_TEST["run_model_with_bnb"],
+)
+def test_text_generation_bnb(
+    model_name: str,
+    world_size: int,
+    output_len: int,
+    quantize_with_bnb: bool,
+    check_output: bool,
+    baseline,
+    token,
+):
+    _test_text_generation(
+        model_name,
+        baseline,
+        token,
+        world_size=world_size,
+        torch_compile=True,
+        regional_compile=True,
+        quantize_with_bnb=quantize_with_bnb,
+        max_output_tokens=output_len,
+        check_output=check_output,
+    )
+
+
 @pytest.mark.parametrize("model_name, world_size, batch_size", MODELS_TO_TEST["deepspeed"])
 def test_text_generation_deepspeed(model_name: str, world_size: int, batch_size: int, baseline, token):
     _test_text_generation(model_name, baseline, token, deepspeed=True, world_size=world_size, batch_size=batch_size)