diff --git a/.github/workflows/fast_tests.yml b/.github/workflows/fast_tests.yml
index b47decc095..7a0f7f0117 100644
--- a/.github/workflows/fast_tests.yml
+++ b/.github/workflows/fast_tests.yml
@@ -18,7 +18,7 @@ jobs:
group: itac-bm-emr-gaudi3-dell-2gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -43,7 +43,7 @@ jobs:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
diff --git a/.github/workflows/slow_tests_gaudi2.yml b/.github/workflows/slow_tests_gaudi2.yml
index 268a4c0e28..aed176a08d 100644
--- a/.github/workflows/slow_tests_gaudi2.yml
+++ b/.github/workflows/slow_tests_gaudi2.yml
@@ -17,7 +17,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -30,7 +30,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash tests/ci/example_diff_tests.sh
stable-diffusion:
name: Test Stable Diffusion
@@ -43,7 +43,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -59,7 +59,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
deepspeed:
name: Test DeepSpeed models
@@ -72,7 +72,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -88,7 +88,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
fsdp:
name: Test FSDP models
@@ -101,7 +101,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -117,7 +117,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
make slow_tests_fsdp TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
multi-card:
name: Test multi-card models
@@ -130,7 +130,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -146,7 +146,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash tests/ci/slow_tests_8x.sh ${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
single-card:
name: Test single-card models
@@ -160,7 +160,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -177,7 +177,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash tests/ci/slow_tests_1x.sh
text-generation:
name: Test text-generation example
@@ -192,7 +192,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -208,7 +208,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
trl:
name: Test TRL integration
@@ -221,7 +221,7 @@ jobs:
uses: actions/checkout@v2
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -237,7 +237,7 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash tests/ci/slow_tests_trl.sh
sentence-transformers:
name: Test Sentence Transformers integration
@@ -258,7 +258,7 @@ jobs:
path: sentence-transformers
- name: Pull image
run: |
- docker pull vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ docker pull vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
- name: Run tests
run: |
docker run \
@@ -274,5 +274,5 @@ jobs:
--cap-add=sys_nice \
--net=host \
--ipc=host \
- vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
+ vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest \
/bin/bash optimum-habana/tests/ci/sentence_transformers.sh
diff --git a/.github/workflows/slow_tests_gaudi3.yml b/.github/workflows/slow_tests_gaudi3.yml
index 03960d70af..6f9b3e699f 100644
--- a/.github/workflows/slow_tests_gaudi3.yml
+++ b/.github/workflows/slow_tests_gaudi3.yml
@@ -14,7 +14,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -37,7 +37,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -60,7 +60,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -83,7 +83,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -106,7 +106,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -130,7 +130,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -155,7 +155,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-8gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -178,7 +178,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
@@ -201,7 +201,7 @@ jobs:
runs-on:
group: itac-bm-emr-gaudi3-dell-1gaudi
container:
- image: docker://vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+ image: docker://vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
options: --workdir=/root/workspace --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES
env:
OMPI_MCA_btl_vader_single_copy_mechanism: none
diff --git a/Makefile b/Makefile
index 433ce83fec..e03219dbcb 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
export PT_HPU_LAZY_MODE=1
# will be removed when lazy is disabled
-.PHONY: style test
+.PHONY: style test install_deepspeed
# Run code quality checks
style_check: clean
@@ -96,13 +96,12 @@ slow_tests_1x: test_installs
# Run multi-card non-regression tests
slow_tests_8x: test_installs
@status1=0; status2=0; \
- DATA_CACHE=$(DATA_CACHE) python -m pytest tests/test_examples.py -v -s -k "multi_card" || status1=$$?; \
+ DATASET_CONFIG='$(DATASET_CONFIG)' python -m pytest tests/test_examples.py -v -s -k "multi_card" || status1=$$?; \
python -m pytest tests/test_habana_profiler_integration.py -v -s -m x8 || status2=$$?; \
exit $$((status1 + status2))
# Run DeepSpeed non-regression tests
-slow_tests_deepspeed: test_installs
- python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_deepspeed: test_installs install_deepspeed
python -m pytest tests/test_examples.py -v -s -k "deepspeed"
slow_tests_diffusers: test_installs
@@ -113,10 +112,9 @@ slow_tests_sentence_transformers: test_installs
python -m pytest tests/test_sentence_transformers.py -v -s
# Run all text-generation non-regression tests
-slow_tests_text_generation_example: test_installs
+slow_tests_text_generation_example: test_installs install_deepspeed
python -m pip install -r examples/text-generation/requirements_awq.txt
BUILD_CUDA_EXT=0 python -m pip install -vvv --no-build-isolation git+https://github.com/HabanaAI/AutoGPTQ.git
- python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
python -m pip install tiktoken blobfile
python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder.py -v -s --token $(TOKEN)
@@ -127,18 +125,15 @@ slow_tests_text_generation_example_1x: test_installs
python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder.py -m "(not x2) and (not x4) and (not x8)" -v -s --token $(TOKEN)
# Run subset of text-generation non-regression tests that require 2 Gaudi cards
-slow_tests_text_generation_example_2x: test_installs
- python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_text_generation_example_2x: test_installs install_deepspeed
python -m pytest tests/test_text_generation_example.py -m x2 -v -s --token $(TOKEN)
# Run subset of text-generation non-regression tests that require 4 Gaudi cards
-slow_tests_text_generation_example_4x: test_installs
- python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_text_generation_example_4x: test_installs install_deepspeed
python -m pytest tests/test_text_generation_example.py -m x4 -v -s --token $(TOKEN)
# Run subset of text-generation non-regression tests that require 8 Gaudi cards
-slow_tests_text_generation_example_8x: test_installs
- python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
+slow_tests_text_generation_example_8x: test_installs install_deepspeed
python -m pytest tests/test_text_generation_example.py -m x8 -v -s --token $(TOKEN)
# Run image-to-text non-regression tests
@@ -146,7 +141,10 @@ slow_tests_image_to_text_example: test_installs
python -m pytest tests/test_image_to_text_example.py -v -s --token $(TOKEN)
slow_tests_image_to_text_example_1x: test_installs
- python -m pytest tests/test_image_to_text_example.py -m "not x8" -v -s --token $(TOKEN)
+ python -m pytest tests/test_image_to_text_example.py -m "(not x2) and (not x8)" -v -s --token $(TOKEN)
+
+slow_tests_image_to_text_example_2x: test_installs
+ python -m pytest tests/test_image_to_text_example.py -m x2 -v -s --token $(TOKEN)
slow_tests_image_to_text_example_8x: test_installs
python -m pytest tests/test_image_to_text_example.py -m x8 -v -s --token $(TOKEN)
@@ -225,3 +223,6 @@ clean:
test_installs:
python -m pip install .[tests]
+
+install_deepspeed:
+ python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.21.0
diff --git a/README.md b/README.md
index 95c4f2d20d..e9b1a96f4a 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ Please refer to the Intel Gaudi AI Accelerator official [installation guide](htt
> Tests should be run in a Docker container based on Intel Gaudi's official images. Instructions to
> obtain the latest containers from the Intel Gaudi Vault are available
> [here](https://docs.habana.ai/en/latest/Installation_Guide/Additional_Installation/Docker_Installation.html#use-intel-gaudi-containers).
-> The current Optimum for Intel Gaudi has been validated with Intel Gaudi v1.21 stack.
+> The current Optimum for Intel Gaudi has been validated with Intel Gaudi v1.22 stack.
## Install the library and get example scripts
@@ -65,9 +65,9 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is up
To use the example associated with the latest stable release, run:
```bash
git clone https://github.com/huggingface/optimum-habana
-cd optimum-habana && git checkout v1.18.0
+cd optimum-habana && git checkout v1.19.0
```
-with `v1.18.0` being the latest Optimum for Intel Gaudi release version.
+with `v1.19.0` being the latest Optimum for Intel Gaudi release version.
### Option 2: Use the latest main branch under development
@@ -284,7 +284,8 @@ The following model architectures, tasks and device distributions have been vali
| ChatGLM |
DeepSpeed | Single card | [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation) |
| Qwen2-VL | | Single card | [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text) |
| VideoLLaVA | | Single card | [Video comprehension](https://github.com/huggingface/optimum-habana/tree/main/examples/video-comprehension) |
-| GLM-4V | | Single card | [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)
+| GLM-4V | | Single card | [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text) |
+| Arctic | | DeepSpeed | [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation) |
diff --git a/conftest.py b/conftest.py
index b05956913c..e6dc2b57ff 100644
--- a/conftest.py
+++ b/conftest.py
@@ -3,6 +3,7 @@
import operator
import os
import sys
+import time
from pathlib import Path
import pytest
@@ -113,6 +114,14 @@ def token(request):
def pytest_configure(config):
+ junitxml_path = config.getoption("junitxml", None)
+ junitxml_global_dir = os.getenv("JUNITXML_DIR", None)
+
+ if not junitxml_path and junitxml_global_dir:
+ timestamp = time.strftime("%Y%m%d%H%M%S")
+ os.makedirs(junitxml_global_dir, exist_ok=True)
+ config.option.xmlpath = os.path.join(junitxml_global_dir, f"result_{timestamp}.xml")
+
# Bitsandbytes installation for {test_bnb_qlora.py test_bnb_inference.py} tests
# This change will be reverted shortly
bnb_tests = any("bnb" in name for name in config.known_args_namespace.file_or_dir)
@@ -126,7 +135,7 @@ def pytest_configure(config):
"-m",
"pip",
"install",
- "git+https://github.com/bitsandbytes-foundation/bitsandbytes.git@multi-backend-refactor",
+ "git+https://github.com/bitsandbytes-foundation/bitsandbytes.git@main",
]
)
name = ""
diff --git a/docs/Dockerfile b/docs/Dockerfile
index 038c41100b..51c23063b7 100644
--- a/docs/Dockerfile
+++ b/docs/Dockerfile
@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
ARG commit_sha
ARG clone_url
diff --git a/docs/source/index.mdx b/docs/source/index.mdx
index 7c0246dc0f..4aba07b70d 100644
--- a/docs/source/index.mdx
+++ b/docs/source/index.mdx
@@ -113,6 +113,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
| ChatGLM | DeepSpeed | Single card | [language modeling](https://github.com/huggingface/optimum-habana/tree/main/examples/language-modeling)[text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation) |
| Qwen2-VL | | Single card | [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text) |
| GLM-4V | | Single card | [image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text) |
+| Arctic | | DeepSpeed | [text generation](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation) |
- Diffusers
diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
index d077f21939..1c3ec01d3a 100644
--- a/docs/source/quickstart.mdx
+++ b/docs/source/quickstart.mdx
@@ -32,10 +32,10 @@ platform for deep learning and follow the steps to start and connect to the node
## Docker Setup
Now that you have access to the node, you will use the latest Intel Gaudi AI Accelerator docker image by executing the docker run command which will
-automatically download and run the docker. At the time of writing this guide, latest Gaudi docker version was 1.21.0:
+automatically download and run the docker. At the time of writing this guide, latest Gaudi docker version was 1.22.0:
```bash
-release=1.21.0
+release=1.22.0
os=ubuntu22.04
torch=2.6.0
docker_image=vault.habana.ai/gaudi-docker/$release/$os/habanalabs/pytorch-installer-$torch:latest
@@ -65,11 +65,11 @@ docker run -itd \
## Optimum for Intel Gaudi Setup
Check latest release of Optimum for Intel Gaudi [here](https://github.com/huggingface/optimum-habana/releases).
-At the time of writing this guide, latest Optimum for Intel Gaudi release version was v1.18.0, which is paired with Intel Gaudi Software release
-version 1.21.0. Install Optimum for Intel Gaudi as follows:
+At the time of writing this guide, latest Optimum for Intel Gaudi release version was v1.19.0, which is paired with Intel Gaudi Software release
+version 1.22.0. Install Optimum for Intel Gaudi as follows:
```bash
-git clone -b v1.18.0 https://github.com/huggingface/optimum-habana
+git clone -b v1.19.0 https://github.com/huggingface/optimum-habana
pip install ./optimum-habana
```
diff --git a/docs/source/usage_guides/deepspeed.mdx b/docs/source/usage_guides/deepspeed.mdx
index 098f1192b6..40cd670383 100644
--- a/docs/source/usage_guides/deepspeed.mdx
+++ b/docs/source/usage_guides/deepspeed.mdx
@@ -79,7 +79,7 @@ It is strongly advised to read [this section](https://huggingface.co/docs/transf
-Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.21.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Intel.
+Other examples of configurations for HPUs are proposed [here](https://github.com/HabanaAI/Model-References/tree/1.22.0/PyTorch/nlp/DeepSpeedExamples/deepspeed-bert/scripts) by Intel.
The [Transformers documentation](https://huggingface.co/docs/transformers/main_classes/deepspeed#configuration) explains how to write a configuration from scratch very well.
A more complete description of all configuration possibilities is available [here](https://www.deepspeed.ai/docs/config-json/).
diff --git a/examples/audio-classification/README.md b/examples/audio-classification/README.md
index ffc38e6709..143701b087 100644
--- a/examples/audio-classification/README.md
+++ b/examples/audio-classification/README.md
@@ -27,9 +27,6 @@ First, you should install the requirements:
pip install -r requirements.txt
```
-> [!NOTE]
-> Please add the flags ENABLE_LB_BUNDLE_ALL_COMPUTE_MME=0 and ENABLE_EXPERIMENTAL_FLAGS=1 for facebook/wav2vec2-base stability issues on gaudi3. Please note this is a workaround for release 1.20 only.
-
## Single-HPU
The following command shows how to fine-tune [wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the 🗣️ [Keyword Spotting subset](https://huggingface.co/datasets/superb#ks) of the SUPERB dataset on a single HPU.
diff --git a/examples/audio-classification/requirements.txt b/examples/audio-classification/requirements.txt
index bae36f7451..9367c8e899 100644
--- a/examples/audio-classification/requirements.txt
+++ b/examples/audio-classification/requirements.txt
@@ -1,4 +1,5 @@
-datasets>=1.14.0
-evaluate
+datasets == 3.6.0
+evaluate == 0.4.3
numba==0.60.0
-librosa
+librosa == 0.10.2.post1
+
diff --git a/examples/audio-classification/run_audio_classification.py b/examples/audio-classification/run_audio_classification.py
index 073b8ad577..801250e27d 100644
--- a/examples/audio-classification/run_audio_classification.py
+++ b/examples/audio-classification/run_audio_classification.py
@@ -47,7 +47,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
diff --git a/examples/contrastive-image-text/requirements.txt b/examples/contrastive-image-text/requirements.txt
index 877a4cc85f..d1fff8c979 100644
--- a/examples/contrastive-image-text/requirements.txt
+++ b/examples/contrastive-image-text/requirements.txt
@@ -1 +1 @@
-datasets>=1.8.0
+datasets >= 1.8.0, <= 2.19.2
diff --git a/examples/contrastive-image-text/run_bridgetower.py b/examples/contrastive-image-text/run_bridgetower.py
index 0b98b7a0b2..67042a6191 100644
--- a/examples/contrastive-image-text/run_bridgetower.py
+++ b/examples/contrastive-image-text/run_bridgetower.py
@@ -59,7 +59,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
@@ -101,7 +101,7 @@ class ModelArguments:
},
)
trust_remote_code: bool = field(
- default=False,
+ default=True,
metadata={
"help": (
"Whether to trust the execution of code from datasets/models defined on the Hub."
diff --git a/examples/contrastive-image-text/run_clip.py b/examples/contrastive-image-text/run_clip.py
index 2e928ec3d6..a35b6cff66 100644
--- a/examples/contrastive-image-text/run_clip.py
+++ b/examples/contrastive-image-text/run_clip.py
@@ -62,7 +62,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/contrastive-image-text/requirements.txt")
@@ -104,7 +104,7 @@ class ModelArguments:
},
)
trust_remote_code: bool = field(
- default=False,
+ default=True,
metadata={
"help": (
"Whether to trust the execution of code from datasets/models defined on the Hub."
diff --git a/examples/image-classification/requirements.txt b/examples/image-classification/requirements.txt
index 4cbf42532d..2336488f93 100644
--- a/examples/image-classification/requirements.txt
+++ b/examples/image-classification/requirements.txt
@@ -1,6 +1,6 @@
-torch>=1.5.0
-torchvision>=0.6.0
-datasets>=2.14.0
-evaluate
+torch >= 1.5.0
+torchvision >= 0.6.0
+datasets >= 2.14.0, <= 2.19.2
+evaluate == 0.4.3
scikit-learn == 1.5.2
timm>=0.9.16
diff --git a/examples/image-classification/run_image_classification.py b/examples/image-classification/run_image_classification.py
index a82428eb94..940bc19377 100644
--- a/examples/image-classification/run_image_classification.py
+++ b/examples/image-classification/run_image_classification.py
@@ -65,7 +65,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
diff --git a/examples/image-to-text/requirements.txt b/examples/image-to-text/requirements.txt
index 4abc5d3998..871e7ad665 100644
--- a/examples/image-to-text/requirements.txt
+++ b/examples/image-to-text/requirements.txt
@@ -3,4 +3,4 @@ Levenshtein
sentencepiece != 0.1.92
tiktoken
blobfile
-datasets
+datasets == 3.6.0
diff --git a/examples/image-to-text/run_image2text_lora_finetune.py b/examples/image-to-text/run_image2text_lora_finetune.py
index 927d58749f..95307f229a 100644
--- a/examples/image-to-text/run_image2text_lora_finetune.py
+++ b/examples/image-to-text/run_image2text_lora_finetune.py
@@ -55,7 +55,7 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
def normalized_levenshtein(s1, s2):
diff --git a/examples/image-to-text/run_pipeline.py b/examples/image-to-text/run_pipeline.py
index 2c5cdbf3e4..6381cc1133 100644
--- a/examples/image-to-text/run_pipeline.py
+++ b/examples/image-to-text/run_pipeline.py
@@ -359,6 +359,7 @@ def main():
model = AutoModelForVision2Seq.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype)
if model_type == "mllama":
model.language_model = initialize_distributed_model(args, model.language_model, logger, model_dtype)
+ model.to("hpu")
else:
model = initialize_distributed_model(args, model, logger, model_dtype)
generator = pipeline(
diff --git a/examples/kubernetes/Dockerfile b/examples/kubernetes/Dockerfile
index 95d29b30bf..77c91c28bf 100644
--- a/examples/kubernetes/Dockerfile
+++ b/examples/kubernetes/Dockerfile
@@ -1,7 +1,7 @@
-ARG GAUDI_SW_VER=1.21.0
+ARG GAUDI_SW_VER=1.22.0
ARG OS=ubuntu22.04
ARG TORCH_VER=2.6.0
-ARG OPTIMUM_HABANA_VER=1.18.0
+ARG OPTIMUM_HABANA_VER=1.19.0
FROM vault.habana.ai/gaudi-docker/${GAUDI_SW_VER}/${OS}/habanalabs/pytorch-installer-${TORCH_VER}:latest AS optimum-habana
diff --git a/examples/kubernetes/README.md b/examples/kubernetes/README.md
index 8332098d18..a6b1ed0333 100644
--- a/examples/kubernetes/README.md
+++ b/examples/kubernetes/README.md
@@ -43,12 +43,12 @@ Use the following commands to build the containers:
```bash
# Specify the Gaudi SW version, OS, and PyTorch version which will be used for the base container
-export GAUDI_SW_VER=1.21.0
+export GAUDI_SW_VER=1.22.0
export OS=ubuntu22.04
export TORCH_VER=2.6.0
# Specify the version of optimum-habana to install in the container
-export OPTIMUM_HABANA_VER=1.18.0
+export OPTIMUM_HABANA_VER=1.19.0
git clone https://github.com/huggingface/optimum-habana.git
diff --git a/examples/kubernetes/README.md.gotmpl b/examples/kubernetes/README.md.gotmpl
index d69e412208..534550e74e 100644
--- a/examples/kubernetes/README.md.gotmpl
+++ b/examples/kubernetes/README.md.gotmpl
@@ -43,12 +43,12 @@ Use the following commands to build the containers:
```bash
# Specify the Gaudi SW version, OS, and PyTorch version which will be used for the base container
-export GAUDI_SW_VER=1.21.0
+export GAUDI_SW_VER=1.22.0
export OS=ubuntu22.04
export TORCH_VER=2.6.0
# Specify the version of optimum-habana to install in the container
-export OPTIMUM_HABANA_VER=1.18.0
+export OPTIMUM_HABANA_VER=1.19.0
git clone https://github.com/huggingface/optimum-habana.git
diff --git a/examples/kubernetes/docker-compose.yaml b/examples/kubernetes/docker-compose.yaml
index 8a6c6c2a6c..75844263cc 100644
--- a/examples/kubernetes/docker-compose.yaml
+++ b/examples/kubernetes/docker-compose.yaml
@@ -5,30 +5,30 @@ services:
http_proxy: ${http_proxy:-""}
https_proxy: ${https_proxy:-""}
no_proxy: ${no_proxy:-""}
- GAUDI_SW_VER: ${GAUDI_SW_VER:-1.21.0}
+ GAUDI_SW_VER: ${GAUDI_SW_VER:-1.22.0}
OS: ${OS:-ubuntu22.04}
- OPTIMUM_HABANA_VER: ${OPTIMUM_HABANA_VER:-1.18.0}
+ OPTIMUM_HABANA_VER: ${OPTIMUM_HABANA_VER:-1.19.0}
TORCH_VER: ${TORCH_VER:-2.6.0}
REGISTRY: ${REGISTRY}
REPO: ${REPO}
context: .
labels:
- org.opencontainers.base.name: "vault.habana.ai/gaudi-docker/${GAUDI_SW_VER:-1.21.0}/${OS:-ubuntu22.04}/habanalabs/pytorch-installer-${TORCH_VER:-2.6.0}:latest"
+ org.opencontainers.base.name: "vault.habana.ai/gaudi-docker/${GAUDI_SW_VER:-1.22.0}/${OS:-ubuntu22.04}/habanalabs/pytorch-installer-${TORCH_VER:-2.6.0}:latest"
org.opencontainers.image.title: "Optimum for Intel® Gaudi® Accelerators"
- org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.18.0}
+ org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.19.0}
command: >
sh -c "python -c 'from optimum import habana; print(\"optimum-habana:\", habana.__version__)'"
- image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.18.0}
+ image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.19.0}
pull_policy: always
optimum-habana-examples:
build:
labels:
- org.opencontainers.base.name: "${REGISTRY}/${REPO}:gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.18.0}"
+ org.opencontainers.base.name: "${REGISTRY}/${REPO}:gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-${OPTIMUM_HABANA_VER:-1.19.0}"
org.opencontainers.image.title: "Optimum for Intel® Gaudi® Accelerators Examples"
- org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.18.0}
+ org.opencontainers.image.version: gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.19.0}
target: optimum-habana-examples
command: >
sh -c "python -c 'from optimum import habana; print(\"optimum-habana:\", habana.__version__)'"
extends: optimum-habana
- image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.21.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.18.0}
+ image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-gaudi-${GAUDI_SW_VER:-1.22.0}-optimum-habana-examples-${OPTIMUM_HABANA_VER:-1.19.0}
diff --git a/examples/language-modeling/requirements.txt b/examples/language-modeling/requirements.txt
index aa223dd7f6..a2558a3836 100644
--- a/examples/language-modeling/requirements.txt
+++ b/examples/language-modeling/requirements.txt
@@ -1,6 +1,6 @@
-datasets >= 2.14.0
+datasets >= 2.14.0, <= 2.19.2
sentencepiece != 0.1.92
-protobuf
-evaluate
+protobuf == 3.20.3
+evaluate == 0.4.3
scikit-learn == 1.5.2
peft == 0.12.0
diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
index 1c90b93ed3..373861ebf9 100644
--- a/examples/language-modeling/run_clm.py
+++ b/examples/language-modeling/run_clm.py
@@ -63,7 +63,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
diff --git a/examples/language-modeling/run_lora_clm.py b/examples/language-modeling/run_lora_clm.py
index 1d4d328139..b22eabba44 100644
--- a/examples/language-modeling/run_lora_clm.py
+++ b/examples/language-modeling/run_lora_clm.py
@@ -70,7 +70,7 @@ def check_optimum_habana_min_version(*a, **b):
logger = logging.getLogger(__name__)
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
@dataclass
diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py
index 98741f2b4b..3c58cfaa47 100644
--- a/examples/language-modeling/run_mlm.py
+++ b/examples/language-modeling/run_mlm.py
@@ -62,7 +62,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=2.14.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
diff --git a/examples/language-modeling/run_multitask_prompt_tuning.py b/examples/language-modeling/run_multitask_prompt_tuning.py
index 1cd743a874..5ba6a2ca53 100644
--- a/examples/language-modeling/run_multitask_prompt_tuning.py
+++ b/examples/language-modeling/run_multitask_prompt_tuning.py
@@ -61,7 +61,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risk.
check_min_version("4.49.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
diff --git a/examples/language-modeling/run_prompt_tuning_clm.py b/examples/language-modeling/run_prompt_tuning_clm.py
index 1a35196445..bef9984b70 100644
--- a/examples/language-modeling/run_prompt_tuning_clm.py
+++ b/examples/language-modeling/run_prompt_tuning_clm.py
@@ -63,7 +63,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.49.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
diff --git a/examples/multi-node-training/EFA/Dockerfile b/examples/multi-node-training/EFA/Dockerfile
index 5fe9c2386d..76b4a8cce6 100644
--- a/examples/multi-node-training/EFA/Dockerfile
+++ b/examples/multi-node-training/EFA/Dockerfile
@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
# Installs pdsh and upgrade pip
RUN apt-get update && apt-get install -y pdsh && \
diff --git a/examples/multi-node-training/GaudiNIC/Dockerfile b/examples/multi-node-training/GaudiNIC/Dockerfile
index e3774c80b1..f7301380f5 100644
--- a/examples/multi-node-training/GaudiNIC/Dockerfile
+++ b/examples/multi-node-training/GaudiNIC/Dockerfile
@@ -1,4 +1,4 @@
-FROM vault.habana.ai/gaudi-docker/1.21.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
+FROM vault.habana.ai/gaudi-docker/1.22.0/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest
# Installs pdsh and upgrade pip
RUN apt-get update && apt-get install -y pdsh && \
diff --git a/examples/protein-folding/requirements.txt b/examples/protein-folding/requirements.txt
index e3b2a3ba45..3521dca2ad 100644
--- a/examples/protein-folding/requirements.txt
+++ b/examples/protein-folding/requirements.txt
@@ -1,2 +1,2 @@
-datasets>=2.14.0
+datasets == 3.6.0
scikit-learn == 1.5.2
diff --git a/examples/protein-folding/run_esmfold.py b/examples/protein-folding/run_esmfold.py
index 2e2003ab1b..94036a423e 100644
--- a/examples/protein-folding/run_esmfold.py
+++ b/examples/protein-folding/run_esmfold.py
@@ -40,7 +40,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
def convert_outputs_to_pdb(outputs):
diff --git a/examples/protein-folding/run_sequence_classification.py b/examples/protein-folding/run_sequence_classification.py
index 6c69e2f62e..a2aed8bc6a 100644
--- a/examples/protein-folding/run_sequence_classification.py
+++ b/examples/protein-folding/run_sequence_classification.py
@@ -41,7 +41,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
diff --git a/examples/protein-folding/run_zero_shot_eval.py b/examples/protein-folding/run_zero_shot_eval.py
index 83aa1c0ce0..3d3a4edadf 100644
--- a/examples/protein-folding/run_zero_shot_eval.py
+++ b/examples/protein-folding/run_zero_shot_eval.py
@@ -36,7 +36,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logging.basicConfig(
diff --git a/examples/pytorch-image-models/requirements.txt b/examples/pytorch-image-models/requirements.txt
index c18d628ee5..4ad67f5781 100644
--- a/examples/pytorch-image-models/requirements.txt
+++ b/examples/pytorch-image-models/requirements.txt
@@ -1,2 +1,2 @@
timm
-datasets
+datasets == 3.6.0
diff --git a/examples/question-answering/requirements.txt b/examples/question-answering/requirements.txt
index 09d7e4bc77..450d9a4cfc 100644
--- a/examples/question-answering/requirements.txt
+++ b/examples/question-answering/requirements.txt
@@ -1,3 +1,3 @@
-datasets >= 2.4.0
+datasets == 3.6.0
torch >= 1.3.0
-evaluate
+evaluate == 0.4.3
diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py
index 064717d80f..ba70d543d9 100644
--- a/examples/question-answering/run_qa.py
+++ b/examples/question-answering/run_qa.py
@@ -60,7 +60,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
diff --git a/examples/question-answering/run_seq2seq_qa.py b/examples/question-answering/run_seq2seq_qa.py
index 374ec915ca..bd4ccf19a3 100644
--- a/examples/question-answering/run_seq2seq_qa.py
+++ b/examples/question-answering/run_seq2seq_qa.py
@@ -57,7 +57,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
diff --git a/examples/sentence-transformers-training/nli/requirements.txt b/examples/sentence-transformers-training/nli/requirements.txt
index 680dc8a2bb..1b97e4c3d7 100644
--- a/examples/sentence-transformers-training/nli/requirements.txt
+++ b/examples/sentence-transformers-training/nli/requirements.txt
@@ -1,2 +1,2 @@
-datasets
+datasets <= 2.19.2
peft
diff --git a/examples/sentence-transformers-training/paraphrases/requirements.txt b/examples/sentence-transformers-training/paraphrases/requirements.txt
index aee11b288a..b776a8dd19 100644
--- a/examples/sentence-transformers-training/paraphrases/requirements.txt
+++ b/examples/sentence-transformers-training/paraphrases/requirements.txt
@@ -1 +1 @@
-datasets
+datasets <= 2.19.2
diff --git a/examples/sentence-transformers-training/sts/requirements.txt b/examples/sentence-transformers-training/sts/requirements.txt
index 680dc8a2bb..1b97e4c3d7 100644
--- a/examples/sentence-transformers-training/sts/requirements.txt
+++ b/examples/sentence-transformers-training/sts/requirements.txt
@@ -1,2 +1,2 @@
-datasets
+datasets <= 2.19.2
peft
diff --git a/examples/speech-recognition/README.md b/examples/speech-recognition/README.md
index 64fb1d1ebc..69625cc0ab 100644
--- a/examples/speech-recognition/README.md
+++ b/examples/speech-recognition/README.md
@@ -18,13 +18,19 @@ limitations under the License.
## Table of Contents
-- [Automatic Speech Recognition with CTC](#connectionist-temporal-classification)
- - [Single HPU example](#single-hpu-ctc)
- - [Multi HPU example](#multi-hpu-ctc)
-- [Automatic Speech Recognition with Sequence-to-Sequence](#sequence-to-sequence)
- - [Whisper Model](#whisper-model)
- - [Fine tuning](#single-hpu-whisper-fine-tuning-with-seq2seq)
- - [Inference](#single-hpu-seq2seq-inference)
+- [Automatic Speech Recognition Examples](#automatic-speech-recognition-examples)
+ - [Table of Contents](#table-of-contents)
+ - [Requirements](#requirements)
+ - [Connectionist Temporal Classification](#connectionist-temporal-classification)
+ - [Single-HPU CTC](#single-hpu-ctc)
+ - [Multi-HPU CTC](#multi-hpu-ctc)
+ - [DeepSpeed](#deepspeed)
+ - [Inference](#inference)
+ - [Sequence to Sequence](#sequence-to-sequence)
+ - [Whisper Model](#whisper-model)
+ - [Single HPU Whisper Fine tuning with Seq2Seq](#single-hpu-whisper-fine-tuning-with-seq2seq)
+ - [Multi HPU Whisper Training with Seq2Seq](#multi-hpu-whisper-training-with-seq2seq)
+ - [Single HPU Seq2Seq Inference](#single-hpu-seq2seq-inference)
## Requirements
diff --git a/examples/speech-recognition/requirements.txt b/examples/speech-recognition/requirements.txt
index 67aeeaaa30..f5c8404aa4 100644
--- a/examples/speech-recognition/requirements.txt
+++ b/examples/speech-recognition/requirements.txt
@@ -1,5 +1,5 @@
-datasets >= 1.18.0
+datasets >= 1.18.0, <= 2.19.2
numba==0.60.0
-librosa
-jiwer
-evaluate
+librosa == 0.10.2.post1
+jiwer == 3.0.4
+evaluate == 0.4.3
diff --git a/examples/speech-recognition/run_speech_recognition_ctc.py b/examples/speech-recognition/run_speech_recognition_ctc.py
index 3e40517af3..5afe55a335 100644
--- a/examples/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/speech-recognition/run_speech_recognition_ctc.py
@@ -59,7 +59,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
@@ -192,6 +192,10 @@ class DataTrainingArguments:
dataset_name: str = field(
metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
)
+ dataset_dir: Optional[str] = field(
+ default=None,
+ metadata={"help": "Optional path to a local dataset directory (e.g. extracted LibriSpeech)."},
+ )
dataset_config_name: str = field(
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
)
@@ -488,13 +492,18 @@ def main():
# 1. First, let's load the dataset
raw_datasets = DatasetDict()
- raw_datasets["train"] = load_dataset(
- data_args.dataset_name,
- data_args.dataset_config_name,
- split=data_args.train_split_name,
- token=data_args.token,
- trust_remote_code=data_args.trust_remote_code,
- )
+ load_dataset_kwargs = {
+ "path": data_args.dataset_name,
+ "name": data_args.dataset_config_name,
+ "split": data_args.train_split_name,
+ "token": data_args.token,
+ "trust_remote_code": data_args.trust_remote_code,
+ }
+ if data_args.dataset_dir is not None:
+ load_dataset_kwargs["data_dir"] = data_args.dataset_dir
+ logger.info(f"Loading dataset from local cache directory: {data_args.dataset_dir}")
+
+ raw_datasets["train"] = load_dataset(**load_dataset_kwargs)
if data_args.audio_column_name not in raw_datasets["train"].column_names:
raise ValueError(
@@ -514,13 +523,8 @@ def main():
raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
if training_args.do_eval:
- raw_datasets["eval"] = load_dataset(
- data_args.dataset_name,
- data_args.dataset_config_name,
- split=data_args.eval_split_name,
- token=data_args.token,
- trust_remote_code=data_args.trust_remote_code,
- )
+ load_dataset_kwargs["split"] = data_args.eval_split_name
+ raw_datasets["eval"] = load_dataset(**load_dataset_kwargs)
if data_args.max_eval_samples is not None:
raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
diff --git a/examples/speech-recognition/run_speech_recognition_seq2seq.py b/examples/speech-recognition/run_speech_recognition_seq2seq.py
index f52bd73887..562290413b 100755
--- a/examples/speech-recognition/run_speech_recognition_seq2seq.py
+++ b/examples/speech-recognition/run_speech_recognition_seq2seq.py
@@ -56,7 +56,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index f9753fe246..b4d4e46557 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -84,7 +84,7 @@ Stable Diffusion 3 was introduced by Stability AI [here](https://stability.ai/ne
It uses Diffusion Transformer instead of UNet for denoising, which yields improved image quality.
```bash
-PT_HPU_LAZY_MODE=1 \
+PT_HPU_LAZY_MODE=1 PT_HPU_MAX_COMPOUND_OP_SIZE=1 \
python text_to_image_generation.py \
--model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers \
--prompts "Sailing ship painting by Van Gogh" \
@@ -480,4 +480,4 @@ PT_HPU_LAZY_MODE=1 python text_to_video_generation.py \
- **Batch Size Limitation**: Due to a known issue, batch sizes for some Stable Diffusion models need to be reduced.
This issue is expected to be resolved in a future release.
-- **Image-to-Video ControlNet**: The Image-to-Video ControlNet command is currently not supported on Gaudi3.
+- **Image-to-Video ControlNet**: The Image-to-Video ControlNet command is currently not supported on Gaudi3.
\ No newline at end of file
diff --git a/examples/stable-diffusion/depth_to_image_generation.py b/examples/stable-diffusion/depth_to_image_generation.py
index fcd89257d1..0443bc5080 100755
--- a/examples/stable-diffusion/depth_to_image_generation.py
+++ b/examples/stable-diffusion/depth_to_image_generation.py
@@ -41,7 +41,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/image_to_image_generation.py b/examples/stable-diffusion/image_to_image_generation.py
index 9542931b11..f55c01a6f9 100755
--- a/examples/stable-diffusion/image_to_image_generation.py
+++ b/examples/stable-diffusion/image_to_image_generation.py
@@ -41,7 +41,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/image_to_video_generation.py b/examples/stable-diffusion/image_to_video_generation.py
index 3aacdb51a1..c2be57980a 100755
--- a/examples/stable-diffusion/image_to_video_generation.py
+++ b/examples/stable-diffusion/image_to_video_generation.py
@@ -38,7 +38,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
new file mode 100644
index 0000000000..91a74c633c
--- /dev/null
+++ b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
@@ -0,0 +1,18871 @@
+{
+ "GlobalRank": null,
+ "LocalRank": null,
+ "Mode": "DynamicRange",
+ "Nodes": {
+ "conv_in": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.4765625
+ ]
+ ]
+ }
+ },
+ "time_embedding.linear_1": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.171875
+ ]
+ ]
+ }
+ },
+ "time_embedding.linear_2": {
+ "inputs": [
+ [
+ [
+ 3.671875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1318359375
+ ]
+ ]
+ }
+ },
+ "add_embedding.linear_1": {
+ "inputs": [
+ [
+ [
+ 7.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 1.390625
+ ]
+ ]
+ }
+ },
+ "add_embedding.linear_2": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 9.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.57421875
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.57421875
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 7.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.486328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 9.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.65234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.294921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.41796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.0.downsamplers.0.conv": {
+ "inputs": [
+ [
+ [
+ 6.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.proj_in": {
+ "inputs": [
+ [
+ [
+ 7.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.177734375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2314453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 9.1875
+ ]
+ ],
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 5.75
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 314.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 11.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.287109375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1279296875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10302734375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 107.5
+ ]
+ ],
+ [
+ [
+ 5.6875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 856.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 7.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2255859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 32.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.3125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 10.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1865234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 10.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1826171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 10.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1826171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.3359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ],
+ [
+ [
+ 9.5
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 6.21875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 376.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.33203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.154296875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 13.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2412109375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 97.0
+ ]
+ ],
+ [
+ [
+ 4.65625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 17.375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2576.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 9.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.240234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 70.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.291015625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.0.proj_out": {
+ "inputs": [
+ [
+ [
+ 20.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1083984375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.proj_in": {
+ "inputs": [
+ [
+ [
+ 11.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1748046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.193359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1923828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.248046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.96875
+ ]
+ ],
+ [
+ [
+ 7.90625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 199.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1337890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0986328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1884765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 135.0
+ ]
+ ],
+ [
+ [
+ 4.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.953125
+ ]
+ ],
+ [
+ [
+ 15.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1864.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.953125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 19.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.193359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.201171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.35546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.875
+ ]
+ ],
+ [
+ [
+ 8.75
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 6.03125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 173.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1201171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09716796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 8.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1943359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 152.0
+ ]
+ ],
+ [
+ [
+ 4.46875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 19.625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1792.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.201171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 33.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.22265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.attentions.1.proj_out": {
+ "inputs": [
+ [
+ [
+ 22.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 15.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.8203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.271484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.0.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 19.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.251953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 7.46875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 1.1953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.326171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 7.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.322265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.1.downsamplers.0.conv": {
+ "inputs": [
+ [
+ [
+ 27.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.25390625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.proj_in": {
+ "inputs": [
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.3359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 3.359375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 3.359375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 3.359375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.1875
+ ]
+ ],
+ [
+ [
+ 8.1875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 6.8125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 282.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 9.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09521484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2333984375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.119140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 12.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 104.0
+ ]
+ ],
+ [
+ [
+ 7.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 21.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1904.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2158203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 23.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1279296875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1689453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 5.96875
+ ]
+ ],
+ [
+ [
+ 6.28125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 8.6875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 155.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 11.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10400390625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2353515625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.05322265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 95.0
+ ]
+ ],
+ [
+ [
+ 5.40625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 21.75
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1192.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 2.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 18.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1748046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1357421875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.375
+ ]
+ ],
+ [
+ [
+ 7.65625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 6.625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 194.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09912109375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12353515625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0439453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 126.0
+ ]
+ ],
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.5
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 980.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2119140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 22.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1513671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.5625
+ ]
+ ],
+ [
+ [
+ 7.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 7.96875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 188.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10888671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.169921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11767578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.049072265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 122.0
+ ]
+ ],
+ [
+ [
+ 6.28125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 25.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2528.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.671875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.193359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 26.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.13671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.123046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1279296875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.119140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1337890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.0
+ ]
+ ],
+ [
+ [
+ 7.15625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 6.96875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 185.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0810546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1357421875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1044921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.796875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.04638671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 110.0
+ ]
+ ],
+ [
+ [
+ 4.5625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 23.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1448.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 26.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.130859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1259765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12451171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.65625
+ ]
+ ],
+ [
+ [
+ 7.3125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 5.6875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 172.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08349609375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09716796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09228515625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.34375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.040771484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 65.0
+ ]
+ ],
+ [
+ [
+ 4.09375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 26.25
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1104.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.890625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 22.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1240234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1220703125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1357421875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.21875
+ ]
+ ],
+ [
+ [
+ 6.5625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 5.3125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 149.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 20.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.080078125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06884765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.049560546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 1.21875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0260009765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 43.25
+ ]
+ ],
+ [
+ [
+ 3.640625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ],
+ [
+ [
+ 20.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 940.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.18359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 22.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11962890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11767578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 5.9375
+ ]
+ ],
+ [
+ [
+ 5.75
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 5.03125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 145.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 18.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0751953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08740234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0693359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.296875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.039794921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 42.75
+ ]
+ ],
+ [
+ [
+ 3.734375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.953125
+ ]
+ ],
+ [
+ [
+ 24.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 988.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.953125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.734375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 28.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.154296875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11865234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11083984375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.169921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 5.8125
+ ]
+ ],
+ [
+ [
+ 6.0625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 5.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 139.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 19.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.068359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.083984375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.059326171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 1.515625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.039794921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 49.75
+ ]
+ ],
+ [
+ [
+ 3.921875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 24.25
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1368.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 24.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.130859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1240234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.65625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.0625
+ ]
+ ],
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 5.5625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 129.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07275390625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.087890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08984375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 63.5
+ ]
+ ],
+ [
+ [
+ 3.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 32.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1312.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 30.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.17578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.0.proj_out": {
+ "inputs": [
+ [
+ [
+ 44.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11962890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.proj_in": {
+ "inputs": [
+ [
+ [
+ 8.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.490234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12255859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2294921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.90625
+ ]
+ ],
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 5.4375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 230.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10498046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2060546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2021484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 131.0
+ ]
+ ],
+ [
+ [
+ 6.9375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 23.375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1608.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.177734375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 34.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.251953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 5.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 5.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 5.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1787109375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.3125
+ ]
+ ],
+ [
+ [
+ 6.4375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 6.15625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 166.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2392578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1474609375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07568359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 117.0
+ ]
+ ],
+ [
+ [
+ 7.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 19.75
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1528.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.453125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1787109375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 30.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16015625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1728515625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.5625
+ ]
+ ],
+ [
+ [
+ 7.21875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 6.8125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 157.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.224609375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 17.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07568359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 134.0
+ ]
+ ],
+ [
+ [
+ 9.5
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 19.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1400.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.162109375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 46.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1337890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ],
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ],
+ [
+ [
+ 5.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 157.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1806640625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 13.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07373046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 142.0
+ ]
+ ],
+ [
+ [
+ 8.0625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1624.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.03125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 54.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1640625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1318359375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.34375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1513671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.15625
+ ]
+ ],
+ [
+ [
+ 5.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.96875
+ ]
+ ],
+ [
+ [
+ 4.84375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 152.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.96875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1201171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.212890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 18.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.064453125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 139.0
+ ]
+ ],
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 24.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2304.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1923828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 87.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.166015625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1201171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12451171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 5.78125
+ ]
+ ],
+ [
+ [
+ 6.03125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 5.5625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 176.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.21484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.13671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 12.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06396484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 168.0
+ ]
+ ],
+ [
+ [
+ 6.71875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 26.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1872.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1640625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 92.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.123046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1201171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.3125
+ ]
+ ],
+ [
+ [
+ 6.3125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 5.3125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 166.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09033203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.169921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 28.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0576171875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 156.0
+ ]
+ ],
+ [
+ [
+ 4.625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 30.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2096.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 111.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.130859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12255859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 5.875
+ ]
+ ],
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9609375
+ ]
+ ],
+ [
+ [
+ 6.6875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 139.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9609375
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 9.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.123046875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 20.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 142.0
+ ]
+ ],
+ [
+ [
+ 5.25
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 34.25
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2368.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 107.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.46875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.13671875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.46875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.46875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1416015625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.34375
+ ]
+ ],
+ [
+ [
+ 7.875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.953125
+ ]
+ ],
+ [
+ [
+ 7.40625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 164.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.953125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 8.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0986328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 17.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0595703125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 151.0
+ ]
+ ],
+ [
+ [
+ 5.625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 30.125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 4160.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 98.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.130859375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1240234375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.625
+ ]
+ ],
+ [
+ [
+ 7.46875
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 6.625
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 223.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 7.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08349609375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11962890625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08984375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 16.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.055419921875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 119.0
+ ]
+ ],
+ [
+ [
+ 5.78125
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 39.5
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 3680.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.177734375
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 55.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16015625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.attentions.1.proj_out": {
+ "inputs": [
+ [
+ [
+ 28.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07666015625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.58203125
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.65625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 7.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.29296875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.0.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 84.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.265625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.494140625
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.66796875
+ ]
+ ]
+ }
+ },
+ "down_blocks.2.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.3984375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.proj_in": {
+ "inputs": [
+ [
+ [
+ 9.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.341796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 4.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 4.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 4.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.34375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2001953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ],
+ [
+ [
+ 9.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 6.9375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 372.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 11.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09521484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1708984375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.052734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 85.0
+ ]
+ ],
+ [
+ [
+ 4.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 33.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 732.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.015625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.173828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 31.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.392578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1728515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1552734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1806640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.25
+ ]
+ ],
+ [
+ [
+ 10.875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 6.0625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 312.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1708984375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.13671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.05908203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 88.0
+ ]
+ ],
+ [
+ [
+ 6.84375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 864.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 22.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.21875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.21875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1650390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.21875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.228515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.28125
+ ]
+ ],
+ [
+ [
+ 8.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 6.84375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 236.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.29296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.51953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 108.0
+ ]
+ ],
+ [
+ [
+ 7.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 840.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.640625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.193359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 34.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2392578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1552734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.220703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ],
+ [
+ [
+ 9.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 231.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 18.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 127.0
+ ]
+ ],
+ [
+ [
+ 6.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 18.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 924.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1728515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 44.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.197265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1513671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.318359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.71875
+ ]
+ ],
+ [
+ [
+ 7.78125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.15625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 190.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 19.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12060546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1689453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.21875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.072265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 106.5
+ ]
+ ],
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 888.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.34375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1845703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 54.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2275390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.154296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.294921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.625
+ ]
+ ],
+ [
+ [
+ 7.59375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 204.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 20.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1162109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.17578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1279296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06787109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 96.5
+ ]
+ ],
+ [
+ [
+ 4.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 22.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1168.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 53.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.197265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.353515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.0
+ ]
+ ],
+ [
+ [
+ 7.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.40625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 198.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 20.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.177734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.123046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.703125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.061767578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 98.0
+ ]
+ ],
+ [
+ [
+ 3.96875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 25.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2144.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1689453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 56.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.216796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.30859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.25
+ ]
+ ],
+ [
+ [
+ 7.5625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 171.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 20.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1220703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 13.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.052734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 93.5
+ ]
+ ],
+ [
+ [
+ 4.46875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 30.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1784.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.236328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 51.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.337890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1416015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.3359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.59375
+ ]
+ ],
+ [
+ [
+ 7.15625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 8.8125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 143.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 19.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0947265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09130859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 138.0
+ ]
+ ],
+ [
+ [
+ 5.46875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 39.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 5216.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.29296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 50.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.33984375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 10.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 10.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 10.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.302734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.71875
+ ]
+ ],
+ [
+ [
+ 7.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.1875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 151.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 23.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11181640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0810546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.080078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 91.0
+ ]
+ ],
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 31.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 3392.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2021484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 46.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.474609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.0.proj_out": {
+ "inputs": [
+ [
+ [
+ 162.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.proj_in": {
+ "inputs": [
+ [
+ [
+ 8.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.28515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 5.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 5.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 5.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.162109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.31640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ],
+ [
+ [
+ 9.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 6.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 228.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09521484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 79.5
+ ]
+ ],
+ [
+ [
+ 4.59375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 20.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 648.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.220703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 22.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.259765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1845703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.5625
+ ]
+ ],
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9765625
+ ]
+ ],
+ [
+ [
+ 6.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 241.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9765625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1982421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06884765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 65.0
+ ]
+ ],
+ [
+ [
+ 6.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 25.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 684.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 7.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 23.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.248046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.154296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.185546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.03125
+ ]
+ ],
+ [
+ [
+ 7.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ],
+ [
+ [
+ 7.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 184.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 15.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.181640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 126.0
+ ]
+ ],
+ [
+ [
+ 6.34375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 24.375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 904.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2236328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 45.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.20703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1669921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1474609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2216796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.9375
+ ]
+ ],
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 7.3125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 182.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 15.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.201171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1689453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06787109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 154.0
+ ]
+ ],
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 18.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1360.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.275390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 86.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2431640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2353515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.3125
+ ]
+ ],
+ [
+ [
+ 8.375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 7.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 173.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1259765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.193359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06396484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 118.5
+ ]
+ ],
+ [
+ [
+ 5.40625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 19.375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1072.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2099609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 39.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1982421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1474609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2490234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.3125
+ ]
+ ],
+ [
+ [
+ 7.3125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9765625
+ ]
+ ],
+ [
+ [
+ 7.65625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 194.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9765625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1669921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0732421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 117.5
+ ]
+ ],
+ [
+ [
+ 4.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 23.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1816.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.26171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 43.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1923828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 10.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.162109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 10.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.150390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 10.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.345703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.09375
+ ]
+ ],
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 204.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1220703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1337890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0634765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 199.0
+ ]
+ ],
+ [
+ [
+ 5.40625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 22.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2320.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 53.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.17578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.162109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.267578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.875
+ ]
+ ],
+ [
+ [
+ 8.1875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 211.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1005859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1279296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.059326171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 144.0
+ ]
+ ],
+ [
+ [
+ 5.28125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 25.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2640.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2275390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 56.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 10.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 10.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 10.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2099609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.9375
+ ]
+ ],
+ [
+ [
+ 7.875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 9.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 201.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09423828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.03125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11474609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 123.5
+ ]
+ ],
+ [
+ [
+ 4.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 28.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2944.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 6.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.228515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 55.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.27734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 11.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 11.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 11.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.25
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.625
+ ]
+ ],
+ [
+ [
+ 7.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 9.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 136.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1220703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.103515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.076171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 97.5
+ ]
+ ],
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 34.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2336.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 43.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.37890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.1.proj_out": {
+ "inputs": [
+ [
+ [
+ 120.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11865234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.proj_in": {
+ "inputs": [
+ [
+ [
+ 12.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.251953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 4.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 4.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1982421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 4.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.130859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.5390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.59375
+ ]
+ ],
+ [
+ [
+ 8.875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 5.0625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 208.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 10.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10009765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.341796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.259765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.421875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2392578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 116.5
+ ]
+ ],
+ [
+ [
+ 4.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 31.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1080.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 16.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.365234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 4.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 4.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 4.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.26171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.34375
+ ]
+ ],
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 5.90625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 227.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 10.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1689453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.55078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.71875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07373046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 80.5
+ ]
+ ],
+ [
+ [
+ 6.34375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 42.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1056.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.185546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 23.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.244140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 5.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 5.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 5.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.263671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.84375
+ ]
+ ],
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 6.78125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 227.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 11.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.5
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.412109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.703125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08056640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 98.0
+ ]
+ ],
+ [
+ [
+ 4.9375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 49.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 676.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.173828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 35.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2392578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 5.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1650390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 5.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 5.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.34765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.625
+ ]
+ ],
+ [
+ [
+ 8.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 5.90625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 189.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1513671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.4921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.203125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 105.5
+ ]
+ ],
+ [
+ [
+ 4.75
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 19.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 924.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1982421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 37.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.162109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.169921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.494140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.09375
+ ]
+ ],
+ [
+ [
+ 7.59375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 6.1875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 163.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10791015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1728515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.18359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.109375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07958984375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 76.0
+ ]
+ ],
+ [
+ [
+ 4.71875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 17.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 648.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 52.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.28125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1650390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.337890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.71875
+ ]
+ ],
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 5.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 187.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0947265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10205078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.234375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.05810546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 79.0
+ ]
+ ],
+ [
+ [
+ 4.21875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 25.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 828.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1826171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 28.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1826171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.18359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1376953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.30859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.3125
+ ]
+ ],
+ [
+ [
+ 6.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 5.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 154.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0810546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.11865234375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08154296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.453125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.04638671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 60.25
+ ]
+ ],
+ [
+ [
+ 4.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 34.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1480.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.84375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.18359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 50.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.220703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.19140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1513671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.03125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.32421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.09375
+ ]
+ ],
+ [
+ [
+ 8.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 177.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0869140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1318359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08642578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0419921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 61.75
+ ]
+ ],
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 20.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1536.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1669921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 43.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.38671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1806640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.208984375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1337890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.65625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2119140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.90625
+ ]
+ ],
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ],
+ [
+ [
+ 10.6875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 211.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98828125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 17.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08447265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0751953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.043701171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 70.5
+ ]
+ ],
+ [
+ [
+ 3.8125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 38.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1952.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1669921875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 42.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.298828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1640625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.251953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2470703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.96875
+ ]
+ ],
+ [
+ [
+ 7.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 5.78125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 188.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 19.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1787109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07763671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 80.0
+ ]
+ ],
+ [
+ [
+ 4.28125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 46.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 3088.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 5.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1845703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 43.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.5625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.attentions.2.proj_out": {
+ "inputs": [
+ [
+ [
+ 174.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 10.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.435546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.62109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 13.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.384765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.0.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 59.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09228515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 11.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.53515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.76171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 11.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.50390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.1.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 94.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.162109375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.2.conv1": {
+ "inputs": [
+ [
+ [
+ 9.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.91015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.2.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.82421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.2.conv2": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.26953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.resnets.2.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 94.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.0.upsamplers.0.conv": {
+ "inputs": [
+ [
+ [
+ 67.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.400390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.proj_in": {
+ "inputs": [
+ [
+ [
+ 6.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.26171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.20703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.197265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1923828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.34765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 9.125
+ ]
+ ],
+ [
+ [
+ 10.125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ],
+ [
+ [
+ 7.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 268.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1376953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2236328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0751953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1650390625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 91.5
+ ]
+ ],
+ [
+ [
+ 4.1875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 22.875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 836.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 7.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1982421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 29.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.380859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 12.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.201171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 12.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 12.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.380859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 9.875
+ ]
+ ],
+ [
+ [
+ 11.5625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 7.25
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 412.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 16.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1357421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.244140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10498046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 17.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.314453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 159.0
+ ]
+ ],
+ [
+ [
+ 4.59375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 27.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1336.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 9.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1962890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 33.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.201171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.0.proj_out": {
+ "inputs": [
+ [
+ [
+ 34.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.23828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.proj_in": {
+ "inputs": [
+ [
+ [
+ 7.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.205078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.23046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.228515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.173828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 9.125
+ ]
+ ],
+ [
+ [
+ 9.3125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 5.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 272.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1357421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1982421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0751953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 9.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 108.0
+ ]
+ ],
+ [
+ [
+ 4.9375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 21.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 984.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 7.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.251953125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 23.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2099609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2138671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2099609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.28125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.326171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 9.5
+ ]
+ ],
+ [
+ [
+ 10.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 6.625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 508.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.22265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12451171875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 10.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.291015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 141.0
+ ]
+ ],
+ [
+ [
+ 4.65625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 22.875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1376.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 7.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.248046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 68.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2421875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.1.proj_out": {
+ "inputs": [
+ [
+ [
+ 21.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.proj_in": {
+ "inputs": [
+ [
+ [
+ 9.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2255859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 10.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.23046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 10.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2314453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 10.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.298828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.875
+ ]
+ ],
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 6.90625
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 243.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1318359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.255859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12353515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 14.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.203125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 170.0
+ ]
+ ],
+ [
+ [
+ 3.4375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 24.375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 968.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 8.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.396484375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 15.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.318359375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2236328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.263671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 9.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1748046875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.427734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.125
+ ]
+ ],
+ [
+ [
+ 10.375
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ],
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 314.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.98046875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 14.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.29296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 16.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.298828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 117.0
+ ]
+ ],
+ [
+ [
+ 3.671875
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 37.5
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1448.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 7.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.23828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 29.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.30859375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.attentions.2.proj_out": {
+ "inputs": [
+ [
+ [
+ 22.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1591796875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 12.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 1.0078125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1552734375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 11.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.59375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.0.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 146.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2236328125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 7.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 1.015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.470703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 7.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.5546875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.1.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 42.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.24609375
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.2.conv1": {
+ "inputs": [
+ [
+ [
+ 8.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.64453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.2.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.7578125
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.2.conv2": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.263671875
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.resnets.2.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 31.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1259765625
+ ]
+ ]
+ }
+ },
+ "up_blocks.1.upsamplers.0.conv": {
+ "inputs": [
+ [
+ [
+ 20.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1923828125
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.87890625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.296875
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 12.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.90625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.0.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 54.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.3515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 9.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1572265625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 12.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.66015625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.1.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 25.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.2314453125
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.2.conv1": {
+ "inputs": [
+ [
+ [
+ 21.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.78515625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.2.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1845703125
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.2.conv2": {
+ "inputs": [
+ [
+ [
+ 9.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.9140625
+ ]
+ ]
+ }
+ },
+ "up_blocks.2.resnets.2.conv_shortcut": {
+ "inputs": [
+ [
+ [
+ 13.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.25
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.proj_in": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.298828125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 3.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.140625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 3.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 3.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1328125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1923828125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.0625
+ ]
+ ],
+ [
+ [
+ 8.375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 6.4375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 274.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 11.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07470703125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12109375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1064453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 3.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.046875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 83.0
+ ]
+ ],
+ [
+ [
+ 4.59375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 22.625
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1216.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 2.640625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1806640625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.0.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 20.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1630859375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1533203125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.53125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1494140625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ],
+ [
+ [
+ 9.6875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 7.59375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 215.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07958984375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1259765625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1064453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.78125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.042236328125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 83.5
+ ]
+ ],
+ [
+ [
+ 5.21875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 23.75
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1120.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.484375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16015625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.1.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 22.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.142578125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.140625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1767578125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.09375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.90625
+ ]
+ ],
+ [
+ [
+ 9.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 7.03125
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 233.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 13.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.083984375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1220703125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 5.59375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0439453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 72.0
+ ]
+ ],
+ [
+ [
+ 4.03125
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 24.75
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 864.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.546875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1611328125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.2.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 28.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1376953125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1513671875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.9375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1337890625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.4375
+ ]
+ ],
+ [
+ [
+ 8.875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.75
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 219.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 12.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06298828125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.10791015625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07958984375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 13.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.03515625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 62.0
+ ]
+ ],
+ [
+ [
+ 3.921875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 24.25
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1184.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.34375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.154296875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.3.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 29.875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 8.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1416015625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 8.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 8.1875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.15234375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.4375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1708984375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 8.375
+ ]
+ ],
+ [
+ [
+ 9.125
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 212.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 11.8125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0576171875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.09619140625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0693359375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.359375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0322265625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 60.5
+ ]
+ ],
+ [
+ [
+ 3.640625
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 32.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1080.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 3.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.158203125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.4.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 37.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 7.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.138671875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.150390625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 6.96875
+ ]
+ ],
+ [
+ [
+ 8.4375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 7.375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 189.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 9.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0556640625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08642578125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.06591796875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 2.546875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.033935546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 47.5
+ ]
+ ],
+ [
+ [
+ 3.46875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 27.25
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1072.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.34375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.16796875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.5.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 34.25
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1552734375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1357421875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1318359375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.123046875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1240234375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.0
+ ]
+ ],
+ [
+ [
+ 8.125
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 7.03125
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 175.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 8.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.059814453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08349609375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.055908203125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 6.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.030517578125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 61.75
+ ]
+ ],
+ [
+ [
+ 3.90625
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.984375
+ ]
+ ],
+ [
+ [
+ 31.5
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1528.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.984375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.46875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.6.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 44.5
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 6.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.12890625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 6.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.126953125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 6.15625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1416015625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 7.21875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.03125
+ ]
+ ],
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 8.5
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 153.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 6.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.054443359375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.07958984375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.05029296875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 10.375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.031982421875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 57.25
+ ]
+ ],
+ [
+ [
+ 3.40625
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 30.25
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 1080.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1455078125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.7.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 42.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 5.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.134765625
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 5.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1318359375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 5.625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 8.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1484375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.84375
+ ]
+ ],
+ [
+ [
+ 8.625
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ],
+ [
+ [
+ 11.375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 179.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.99609375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 5.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.05517578125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0830078125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.052734375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 1.7109375
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.0296630859375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 61.0
+ ]
+ ],
+ [
+ [
+ 3.59375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ],
+ [
+ [
+ 26.75
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2016.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.9921875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.17578125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.8.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 56.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1435546875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.to_q": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1298828125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.to_k": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1240234375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.to_v": {
+ "inputs": [
+ [
+ [
+ 4.96875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+ "inputs": [
+ [
+ [
+ 4.90625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.14453125
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 7.15625
+ ]
+ ],
+ [
+ [
+ 7.71875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 1.0
+ ]
+ ],
+ [
+ [
+ 8.25
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 153.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 1.0
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.to_q": {
+ "inputs": [
+ [
+ [
+ 5.0625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.046630859375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.to_k": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.08740234375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.to_v": {
+ "inputs": [
+ [
+ [
+ 852.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.04248046875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+ "inputs": [
+ [
+ [
+ 1.0078125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.026123046875
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+ "inputs": [
+ [
+ [
+ 63.0
+ ]
+ ],
+ [
+ [
+ 3.671875
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+ "inputs": [
+ [
+ [
+ 0.90234375
+ ]
+ ],
+ [
+ [
+ 28.625
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+ "inputs": [
+ [
+ [
+ 2224.0
+ ]
+ ]
+ ],
+ "outputs": [
+ [
+ [
+ 0.90234375
+ ]
+ ]
+ ]
+ },
+ "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+ "inputs": [
+ [
+ [
+ 4.40625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1552734375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.transformer_blocks.9.ff.net.2": {
+ "inputs": [
+ [
+ [
+ 51.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.146484375
+ ]
+ ]
+ }
+ },
+ "mid_block.attentions.0.proj_out": {
+ "inputs": [
+ [
+ [
+ 26.0
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.1396484375
+ ]
+ ]
+ }
+ },
+ "mid_block.resnets.0.conv1": {
+ "inputs": [
+ [
+ [
+ 9.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.41015625
+ ]
+ ]
+ }
+ },
+ "mid_block.resnets.0.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.53515625
+ ]
+ ]
+ }
+ },
+ "mid_block.resnets.0.conv2": {
+ "inputs": [
+ [
+ [
+ 11.125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.71484375
+ ]
+ ]
+ }
+ },
+ "mid_block.resnets.1.conv1": {
+ "inputs": [
+ [
+ [
+ 8.5625
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.482421875
+ ]
+ ]
+ }
+ },
+ "mid_block.resnets.1.time_emb_proj": {
+ "inputs": [
+ [
+ [
+ 7.6875
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.640625
+ ]
+ ]
+ }
+ },
+ "mid_block.resnets.1.conv2": {
+ "inputs": [
+ [
+ [
+ 13.3125
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.61328125
+ ]
+ ]
+ }
+ },
+ "conv_out": {
+ "inputs": [
+ [
+ [
+ 11.75
+ ]
+ ]
+ ],
+ "params": {
+ "weight": [
+ [
+ 0.21875
+ ]
+ ]
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz
new file mode 100644
index 0000000000..2e6ad5c196
Binary files /dev/null and b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz differ
diff --git a/examples/stable-diffusion/quantization/measure_config.json b/examples/stable-diffusion/quantization/measure_config.json
new file mode 100755
index 0000000000..04576eeb46
--- /dev/null
+++ b/examples/stable-diffusion/quantization/measure_config.json
@@ -0,0 +1,6 @@
+{
+ "method": "HOOKS",
+ "mode": "MEASURE",
+ "observer": "maxabs",
+ "dump_stats_path": "./quantization/measure/fp8"
+}
diff --git a/examples/stable-diffusion/quantization/quant_config.json b/examples/stable-diffusion/quantization/quant_config.json
new file mode 100755
index 0000000000..b372905d7f
--- /dev/null
+++ b/examples/stable-diffusion/quantization/quant_config.json
@@ -0,0 +1,7 @@
+{
+ "method": "HOOKS",
+ "mode": "QUANTIZE",
+ "observer": "maxabs",
+ "scale_method": "maxabs_hw",
+ "dump_stats_path": "./quantization/measure/fp8"
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/requirements.txt b/examples/stable-diffusion/requirements.txt
index ed24d8c1b7..a8e8750e3f 100644
--- a/examples/stable-diffusion/requirements.txt
+++ b/examples/stable-diffusion/requirements.txt
@@ -1,3 +1,3 @@
-opencv-python
+opencv-python == 4.10.0.84
compel
sentencepiece
diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index e3b0beed48..aac565dcd5 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -42,7 +42,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/text_to_video_generation.py b/examples/stable-diffusion/text_to_video_generation.py
index 144727cbc1..5ab6bf8697 100755
--- a/examples/stable-diffusion/text_to_video_generation.py
+++ b/examples/stable-diffusion/text_to_video_generation.py
@@ -37,7 +37,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
logger = logging.getLogger(__name__)
diff --git a/examples/stable-diffusion/training/requirements.txt b/examples/stable-diffusion/training/requirements.txt
index 7f7e4a2d0e..9a419fde22 100644
--- a/examples/stable-diffusion/training/requirements.txt
+++ b/examples/stable-diffusion/training/requirements.txt
@@ -1,6 +1,6 @@
compel
-datasets
-imagesize
+datasets == 3.6.0
+imagesize == 1.4.1
opencv-python
peft==0.16.0
sentencepiece
diff --git a/examples/stable-diffusion/training/train_controlnet.py b/examples/stable-diffusion/training/train_controlnet.py
index d6c1a391e9..cb199802bf 100755
--- a/examples/stable-diffusion/training/train_controlnet.py
+++ b/examples/stable-diffusion/training/train_controlnet.py
@@ -67,7 +67,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
if is_wandb_available():
import wandb
diff --git a/examples/stable-diffusion/unconditional_image_generation.py b/examples/stable-diffusion/unconditional_image_generation.py
index 979f60b838..174ea398b4 100755
--- a/examples/stable-diffusion/unconditional_image_generation.py
+++ b/examples/stable-diffusion/unconditional_image_generation.py
@@ -20,7 +20,7 @@ def check_optimum_habana_min_version(*a, **b):
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
# Setup logging
logging.basicConfig(
diff --git a/examples/summarization/requirements.txt b/examples/summarization/requirements.txt
index 7f9dc2a9c4..8cbb65a9b4 100644
--- a/examples/summarization/requirements.txt
+++ b/examples/summarization/requirements.txt
@@ -1,8 +1,8 @@
-datasets >= 2.4.0
+datasets >= 2.4.0, <= 2.19.2
sentencepiece != 0.1.92
-protobuf
-rouge-score
-nltk
-py7zr
+protobuf == 3.20.3
+rouge-score == 0.1.2
+nltk == 3.8.1
+py7zr == 0.21.0
torch >= 1.3
-evaluate
+evaluate == 0.4.3
diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py
index 87e5faa9d1..58aab7db09 100755
--- a/examples/summarization/run_summarization.py
+++ b/examples/summarization/run_summarization.py
@@ -65,7 +65,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
diff --git a/examples/text-classification/requirements.txt b/examples/text-classification/requirements.txt
index 7ce7d0ba42..4890b36c21 100644
--- a/examples/text-classification/requirements.txt
+++ b/examples/text-classification/requirements.txt
@@ -1,7 +1,8 @@
-datasets >= 2.4.0
+datasets == 3.6.0
sentencepiece != 0.1.92
-scipy
+scipy == 1.13.1
scikit-learn == 1.5.2
-protobuf
+protobuf == 5.29.4
+tensorboard == 2.19.0
torch >= 1.3
-evaluate
+evaluate == 0.4.3
diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
index be36e601ad..dc641838c5 100755
--- a/examples/text-classification/run_glue.py
+++ b/examples/text-classification/run_glue.py
@@ -58,7 +58,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md
index 73e9ad0e35..8f60425907 100755
--- a/examples/text-generation/README.md
+++ b/examples/text-generation/README.md
@@ -816,13 +816,7 @@ pip install -r requirements_lm_eval.txt
```
> [!NOTE]
-> Please add the flags for following models to improve accuracy when using lm_eval on gaudi2. Please note this is a workaround for 1.20 release only.
->
-> ENABLE_LB_BUNDLE_ALL_COMPUTE_MME=0 COMPLEXGUID_DISABLE_RMS_NORM=true ENABLE_EXPERIMENTAL_FLAGS=true for llama-2-70b-hf[PTQ fp8]
->
-> COMPLEXGUID_DISABLE_RMS_NORM=true ENABLE_EXPERIMENTAL_FLAGS=true for Llama-3.1-70B-Instruct[PTQ fp8] and llama-2-70b-hf[bf16]
->
-> If custom models on hub is being used, please set env variable HF_DATASETS_TRUST_REMOTE_CODE=true instead of arg --trust_remote_code with the installed lm_eval version and dependency datasets==3.6.0
+> If custom models on hub is being used, please set env variable HF_DATASETS_TRUST_REMOTE_CODE=true instead of arg --trust_remote_code with the installed lm_eval version and dependency datasets==2.21.0
### Examples
diff --git a/examples/text-generation/model_adapter.py b/examples/text-generation/model_adapter.py
index e8653a2431..d492aa6505 100644
--- a/examples/text-generation/model_adapter.py
+++ b/examples/text-generation/model_adapter.py
@@ -18,15 +18,23 @@
###############################################################################
import argparse
-from typing import Literal, Optional
+import logging
+from typing import List, Literal, Optional, Union
import torch
import torch.nn.functional as F
+from lm_eval.api.instance import Instance
from lm_eval.models.huggingface import HFLM, TemplateLM
+from lm_eval.models.utils import get_dtype, stop_sequences_criteria
+
+# Local imports
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
+logger = logging.getLogger(__name__)
+
+
class HabanaModelAdapter(HFLM):
def __init__(
self,
@@ -35,10 +43,18 @@ def __init__(
args: argparse.Namespace,
options: GenerationConfig,
backend: Literal["default", "causal", "seq2seq"] = "default",
+ truncation: Optional[bool] = False,
logits_cache: bool = True,
+ max_length: Optional[int] = None,
+ softmax_dtype: Union[str, torch.dtype, None] = None,
add_bos_token: Optional[bool] = True,
prefix_token_id: Optional[int] = None,
delta: Optional[str] = None,
+ # end token for thinking, either the string or int token id.
+ # splits to get response after this token (if provided).
+ think_end_token: Optional[Union[str, int]] = None,
+ enable_thinking: Optional[bool] = None,
+ chat_template_args: Optional[dict] = None,
**kwargs,
) -> None:
# To skip cuda code of the HFLM init
@@ -54,11 +70,32 @@ def __init__(
self.peft = args.peft_model
self.delta = delta
self.custom_prefix_token_id = prefix_token_id
+ if isinstance(think_end_token, str) and think_end_token.isdigit():
+ self.think_end_token = int(think_end_token)
+ else:
+ self.think_end_token = think_end_token
+
+ self.chat_template_args = chat_template_args or {}
+ if enable_thinking is not None:
+ self.chat_template_args.update({"enable_thinking": enable_thinking})
+
# determine which of 'causal' and 'seq2seq' backends to use for HF models
self._get_backend(config=self._config, backend=backend, trust_remote_code=args.trust_remote_code)
+ self.truncation = truncation
self.logits_cache = logits_cache
self.add_bos_token = add_bos_token
- self._max_length = options.max_length
+ self._max_length = max_length
+ self.softmax_dtype = get_dtype(softmax_dtype) if softmax_dtype is not None else None
+ self.hpu_graphs = args.use_hpu_graphs
+ self.use_lazy_mode = True
+ if args.torch_compile:
+ self.use_lazy_mode = False
+ self.vocab_size = self._model.config.vocab_size
+ if "gemma" in getattr(self._config, "model_type", ""):
+ self.add_bos_token = True
+ logger.info(
+ f"Model type is '{self._config.model_type}', part of the Gemma family--a BOS token will be used as Gemma underperforms without it."
+ )
self.batch_size_per_gpu = int(args.batch_size)
self.revision = args.model_revision
self.model_inputs = {"use_cache": self.options.use_cache}
@@ -119,7 +156,8 @@ def eot_token_id(self) -> int:
@property
def max_length(self) -> int:
- return self.buckets[-1]
+ # Legacy
+ return self._max_length if self._max_length else self.buckets[-1]
@property
def device(self):
@@ -127,8 +165,18 @@ def device(self):
# Returning 'cpu' to keep tensors on CPU in lm_eval code
return "cpu"
- def find_bucket(self, length: int) -> list[int]:
- return [b for b in self.buckets if b >= length][0]
+ @max_length.setter
+ def max_length(self, value: int) -> None:
+ self._max_length = value
+
+ def find_bucket(self, length: int, key=lambda b, length: b >= length) -> int:
+ for b in self.buckets:
+ if key(b, length):
+ return b
+ new_bucket = length
+ self.buckets.append(new_bucket)
+ self.buckets.sort()
+ return new_bucket
def _model_call(self, inps: torch.Tensor) -> torch.Tensor:
bs, seq_length = inps.shape
@@ -144,8 +192,60 @@ def _model_call(self, inps: torch.Tensor) -> torch.Tensor:
if self.options.static_shapes and padding_length > 0:
logits = logits[:, :-padding_length, :]
logits = logits.to(torch.float32)
+
return logits
+ def generate_until(self, requests: List[Instance], disable_tqdm: bool = False) -> List[str]:
+ """
+ Override to change only max_length property
+ """
+ legacy_max_length = self.max_length
+ self.max_length = super().max_length
+ # Call the parent class's implementation for the unchanged parts
+ res = super().generate_until(requests, disable_tqdm)
+ self.max_length = legacy_max_length
+ return res
+
+ def _model_generate(self, context, max_length, stop, **generation_kwargs):
+ """
+ Patched method
+ source: https://github.com/EleutherAI/lm-evaluation-harness/blob/v0.4.7/lm_eval/models/huggingface.py/#L858
+ """
+
+ # temperature = 0.0 if not set
+ # if do_sample is false and temp==0.0:
+ # remove temperature, as do_sample=False takes care of this
+ # and we don't want a warning from HF
+ generation_kwargs["temperature"] = generation_kwargs.get("temperature", 0.0)
+ do_sample = generation_kwargs.get("do_sample", None)
+
+ # The temperature has to be a strictly positive float -- if it is 0.0, use greedy decoding strategies
+ if generation_kwargs.get("temperature") == 0.0 and do_sample is None:
+ generation_kwargs["do_sample"] = do_sample = False
+
+ if do_sample is False and generation_kwargs.get("temperature") == 0.0:
+ generation_kwargs.pop("temperature")
+ # build stopping criteria
+ stopping_criteria = stop_sequences_criteria(self.tokenizer, stop, context.shape[1], context.shape[0])
+ # to avoid graph recompilation
+ if self.options.static_shapes:
+ self.options.bucket_internal = True
+ _ = self.find_bucket(context.shape[1])
+ max_gen_toks = max_length - context.shape[1]
+ # move context & attention_mask to hpu
+ context = context.to("hpu")
+ generation_kwargs["attention_mask"] = generation_kwargs["attention_mask"].to("hpu")
+ return self.model.generate(
+ input_ids=context,
+ max_new_tokens=max_gen_toks,
+ stopping_criteria=stopping_criteria,
+ pad_token_id=self.tokenizer.pad_token_id,
+ use_cache=True,
+ hpu_graphs=self.hpu_graphs,
+ lazy_mode=self.use_lazy_mode,
+ **generation_kwargs,
+ )
+
def get_model_info(self) -> dict:
"""
Patched method to get Hugging Face model information for experiment reproducibility.
diff --git a/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization.json b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization.json
new file mode 100644
index 0000000000..62d9150d63
--- /dev/null
+++ b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization.json
@@ -0,0 +1,13 @@
+{
+ "mode": "QUANTIZE",
+ "scale_method": "ACT_MAXABS_PCS_POW2_WEIGHT_MAXABS_PTS_POW2_HW",
+ "scale_format": "CONST",
+ "allowlist": {
+ "types": [],
+ "names": [
+ "mlp"
+ ]
+ },
+ "dynamic_quantization": "True",
+ "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization_pts.json b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization_pts.json
new file mode 100644
index 0000000000..69f41da153
--- /dev/null
+++ b/examples/text-generation/quantization_config/maxabs_quant_dynamic_quantization_pts.json
@@ -0,0 +1,13 @@
+{
+ "mode": "QUANTIZE",
+ "scale_method": "maxabs_pow2",
+ "scale_format": "CONST",
+ "allowlist": {
+ "types": [],
+ "names": [
+ "mlp"
+ ]
+ },
+ "dynamic_quantization": "True",
+ "dump_stats_path": "./hqt_output/measure"
+}
\ No newline at end of file
diff --git a/examples/text-generation/quantization_config/maxabs_quant_qdq.json b/examples/text-generation/quantization_config/maxabs_quant_qdq.json
new file mode 100644
index 0000000000..7b87c0d8d8
--- /dev/null
+++ b/examples/text-generation/quantization_config/maxabs_quant_qdq.json
@@ -0,0 +1,9 @@
+{
+ "method": "HOOKS",
+ "mode": "QUANTIZE",
+ "observer": "maxabs",
+ "scale_method": "maxabs_hw",
+ "scale_format": "SCALAR",
+ "dump_stats_path": "./hqt_output/measure",
+ "use_qdq": "True"
+}
\ No newline at end of file
diff --git a/examples/text-generation/requirements.txt b/examples/text-generation/requirements.txt
index 44aebd041a..3d800cc73b 100644
--- a/examples/text-generation/requirements.txt
+++ b/examples/text-generation/requirements.txt
@@ -1,5 +1,5 @@
-datasets
-peft
+datasets == 3.6.0
+peft == 0.11.1
sentencepiece
tiktoken
-blobfile
\ No newline at end of file
+blobfile
diff --git a/examples/text-generation/requirements_evaluation.txt b/examples/text-generation/requirements_evaluation.txt
new file mode 100644
index 0000000000..596d3f8463
--- /dev/null
+++ b/examples/text-generation/requirements_evaluation.txt
@@ -0,0 +1,5 @@
+evaluate == 0.4.3
+rouge_score == 0.1.2
+pandas <= 2.2.2
+sentencepiece
+nltk==3.8.1
diff --git a/examples/text-generation/requirements_lm_eval.txt b/examples/text-generation/requirements_lm_eval.txt
index 3f1a08bcc4..de1bb95acd 100644
--- a/examples/text-generation/requirements_lm_eval.txt
+++ b/examples/text-generation/requirements_lm_eval.txt
@@ -1,5 +1,11 @@
-lm-eval==0.4.7
-datasets==3.6.0
+lm-eval==0.4.9.1
+datasets == 3.6.0
+evaluate == 0.4.3
+rouge_score == 0.1.2
+accelerate
+pandas <= 2.2.2
+sentencepiece <= 0.2.0
+langdetect <= 1.0.9
+immutabledict <= 4.2.1
tiktoken
blobfile
-sentencepiece
\ No newline at end of file
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
old mode 100755
new mode 100644
index a0da20b1f5..85a9597879
--- a/examples/text-generation/run_generation.py
+++ b/examples/text-generation/run_generation.py
@@ -278,17 +278,26 @@ def setup_parser(parser):
)
parser.add_argument(
"--use_flash_attention",
- action="store_true",
+ nargs="?",
+ const=True,
+ default=False,
+ action=SetTrueOrFalseOrNone,
help="Whether to enable Habana Flash Attention, provided that the model supports it.",
)
parser.add_argument(
"--flash_attention_recompute",
- action="store_true",
+ nargs="?",
+ const=True,
+ default=False,
+ action=SetTrueOrFalseOrNone,
help="Whether to enable Habana Flash Attention in recompute mode on first token generation. This gives an opportunity of splitting graph internally which helps reduce memory consumption.",
)
parser.add_argument(
"--flash_attention_causal_mask",
- action="store_true",
+ nargs="?",
+ const=True,
+ default=False,
+ action=SetTrueOrFalseOrNone,
help="Whether to enable Habana Flash Attention in causal mode on first token generation.",
)
parser.add_argument(
@@ -393,6 +402,11 @@ def setup_parser(parser):
action="store_true",
help="Load an AutoAWQ quantized checkpoint using AutoAWQ.",
)
+ quant_parser_group.add_argument(
+ "--quantize_with_bnb",
+ action="store_true",
+ help="Quantize model to NF4 using BnB and then use NF4 weights for text-generation",
+ )
quant_parser_group.add_argument(
"--disk_offload",
action="store_true",
@@ -516,7 +530,7 @@ def main():
per_sequence_profiler = disabled_profiler
per_token_profiler = active_profiler
- if args.dataset_name == "mlcommons":
+ if args.dataset_name == "openorca" or args.dataset_name == "mlcommons":
# Benchmark over the prompts below
def get_ds(args):
ds = pd.read_pickle(args.mlcommons_dataset)
@@ -546,6 +560,7 @@ def get_input(ds, batch_size):
def generate(input_tokens, size=None, reduce_recompile=False, disable_profiling=False):
"""Generates sequences from the input sentences and returns them."""
+ profiler = disabled_profiler if disable_profiling else per_token_profiler
timer = HabanaGenerationTime()
timer.start()
@@ -565,6 +580,7 @@ def generate(input_tokens, size=None, reduce_recompile=False, disable_profiling=
lazy_mode=use_lazy_mode,
hpu_graphs=args.use_hpu_graphs,
ignore_eos=args.ignore_eos,
+ profiler=profiler,
).cpu()
outputs = outputs.tolist()
for i in range(len(outputs)):
@@ -612,6 +628,7 @@ def rounder(x):
# Benchmark over n_iterations iterations
N = len(input_sentences)
+ per_sequence_profiler.start()
if dyn_prompt_lens is None:
for i in range(args.n_iterations):
results = []
@@ -621,6 +638,7 @@ def rounder(x):
results.extend(generated)
print(f"Generating batch {b}/{N}")
b += 1
+ per_sequence_profiler.step()
else:
repeated_prompt_len = cycle(dyn_prompt_lens)
for i in range(args.n_iterations):
@@ -630,8 +648,10 @@ def rounder(x):
for sentence in input_sentences:
generated = generate(sentence, prompt_len, args.reduce_recompile)
results.extend(generated)
+ per_sequence_profiler.step()
timer.step()
duration = timer.last_duration
+ per_sequence_profiler.stop()
total_new_tokens_generated = args.n_iterations * args.batch_size * args.max_new_tokens
throughput = total_new_tokens_generated / duration
@@ -859,24 +879,24 @@ def rounder(x):
if dyn_prompt_lens is None:
for i in range(args.n_iterations):
generated, first_token_time, rest_token_time, e2e_latency = generate(None, args.reduce_recompile)
+ per_sequence_profiler.step()
first_token_latencies.append(first_token_time)
rest_token_latencies.append(rest_token_time)
e2e_latencies.append(e2e_latency)
- per_sequence_profiler.step()
else:
repeated_prompt_len = cycle(dyn_prompt_lens)
for i in range(args.n_iterations):
prompt_len = next(repeated_prompt_len)
print("Generating for shape,", prompt_len)
generated, first_token_time, rest_token_time, e2e_latency = generate(prompt_len, args.reduce_recompile)
+ per_sequence_profiler.step()
first_token_latencies.append(first_token_time)
rest_token_latencies.append(rest_token_time)
e2e_latencies.append(e2e_latency)
- per_sequence_profiler.step()
timer.step()
+ per_sequence_profiler.stop()
logger.info("Finished running generate")
duration = timer.last_duration
- per_sequence_profiler.stop()
total_new_tokens_generated = args.n_iterations * args.batch_size * args.max_new_tokens
throughput = total_new_tokens_generated / duration
# Calculate average latencies
@@ -1033,7 +1053,7 @@ def generate_dataset(batch, disable_profiling=False):
timer.start()
for i, batch in enumerate(dataloader):
timer.step()
- generate_dataset(batch)
+ generate_dataset(batch, disable_profiling=True)
timer.step()
duration = timer.last_duration
# The first three iterations take longer because of graph compilation
@@ -1042,15 +1062,14 @@ def generate_dataset(batch, disable_profiling=False):
torch_hpu.synchronize()
timer.step()
compilation_duration = timer.last_duration
+
total_new_tokens_generated = 0
duration = 0
separator = "-" * 50
logger.info("Running generate dataset...")
-
timer = HabanaGenerationTime()
timer.start()
per_sequence_profiler.start()
-
for i, batch in enumerate(dataloader):
timer.step()
prompt, outputs = generate_dataset(batch)
@@ -1067,8 +1086,8 @@ def generate_dataset(batch, disable_profiling=False):
if args.run_partial_dataset and args.n_iterations == i + 1:
break
per_sequence_profiler.step()
- timer.step()
per_sequence_profiler.stop()
+ timer.step()
throughput = total_new_tokens_generated / duration
# Print Stats
diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
index aeac93af00..20ea0a9cd5 100644
--- a/examples/text-generation/run_lm_eval.py
+++ b/examples/text-generation/run_lm_eval.py
@@ -22,6 +22,8 @@
import logging
import multiprocessing as mp
import os
+from pathlib import Path
+from typing import Union
import psutil
@@ -53,6 +55,20 @@ def LimitedSpawnPool(_):
mp.Pool = LimitedSpawnPool
+def try_parse_json(value: str) -> Union[str, dict, None]:
+ """
+ From https://github.com/EleutherAI/lm-evaluation-harness/blob/v0.4.9.1/lm_eval/__main__.py
+ """
+ if value is None:
+ return None
+ try:
+ return json.loads(value)
+ except json.JSONDecodeError:
+ if "{" in value:
+ raise argparse.ArgumentTypeError(f"Invalid JSON: {value}. Hint: Use double quotes for JSON strings.")
+ return value
+
+
def setup_lm_eval_parser():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Evaluation script for HPU"
@@ -62,7 +78,7 @@ def setup_lm_eval_parser():
type=int,
nargs="+",
help="Input length buckets to use with static_shapes",
- default=[16, 32, 64, 128, 189, 284, 384, 985],
+ default=[16, 32, 64, 128, 189, 284, 384],
)
parser.add_argument(
@@ -75,7 +91,14 @@ def setup_lm_eval_parser():
help="Tasks to run",
default=["hellaswag", "lambada_openai", "piqa", "winogrande"],
)
- parser.add_argument("--limit_iters", type=int, help="limit examples to run that many iterations", default=None)
+ parser.add_argument(
+ "--limit",
+ "-L",
+ type=float,
+ default=None,
+ metavar="N|0 None:
args = setup_lm_eval_parser()
model, _, tokenizer, generation_config = initialize_model(args, logger)
+ # Delayed imports: external modules are imported here to ensure that
+ # environment variables and runtime configurations are properly initialized
+ # before loading modules that depend on them.
import torch
from lm_eval import evaluator, utils
from model_adapter import HabanaModelAdapter
- with torch.no_grad():
- lm = HabanaModelAdapter(tokenizer, model, args, generation_config)
-
from optimum.habana.utils import HabanaGenerationTime, get_hpu_memory_stats
+ max_length = None
+ metadata = None
+ if args.metadata:
+ metadata = args.metadata if isinstance(args.metadata, dict) else utils.sample_parse_args_string(args.metadata)
+ max_length = args.metadata.get("max_length")
+
+ if args.fewshot_as_multiturn and args.apply_chat_template is False:
+ raise ValueError(
+ "When `fewshot_as_multiturn` is selected, `apply_chat_template` must be set (either to `True` or to the chosen template name)."
+ )
+ if args.samples:
+ assert args.limit is None, "If --samples is not None, then --limit must be None."
+ if (samples := Path(args.samples)).is_file():
+ args.samples = json.loads(samples.read_text())
+ else:
+ args.samples = json.loads(args.samples)
+
+ with torch.no_grad():
+ lm = HabanaModelAdapter(tokenizer, model, args, generation_config, max_length=max_length)
+
with HabanaGenerationTime() as timer:
with torch.no_grad():
- log_samples = args.log_samples
- results = evaluator.simple_evaluate(lm, tasks=args.tasks, limit=args.limit_iters, log_samples=log_samples)
+ results = evaluator.simple_evaluate(
+ lm,
+ tasks=args.tasks,
+ limit=args.limit,
+ samples=args.samples,
+ log_samples=args.log_samples,
+ num_fewshot=args.num_fewshot,
+ fewshot_as_multiturn=args.fewshot_as_multiturn,
+ gen_kwargs=args.gen_kwargs,
+ system_instruction=args.system_instruction,
+ apply_chat_template=args.apply_chat_template,
+ metadata=metadata,
+ confirm_run_unsafe_code=args.confirm_run_unsafe_code,
+ )
if args.device == "hpu":
import habana_frameworks.torch.hpu as torch_hpu
diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py
index 2a6454be75..a82a475484 100644
--- a/examples/text-generation/utils.py
+++ b/examples/text-generation/utils.py
@@ -138,7 +138,7 @@ def setup_env(args):
from optimum.habana.utils import check_optimum_habana_min_version
- check_optimum_habana_min_version("1.18.0.dev0")
+ check_optimum_habana_min_version("1.19.0.dev0")
# Tweak generation so that it runs faster on Gaudi
from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
@@ -290,6 +290,22 @@ def setup_model(args, model_dtype, model_kwargs, logger):
model = AutoModelForCausalLM.from_pretrained(
args.model_name_or_path, torch_dtype=model_dtype, quantization_config=quantization_config, **model_kwargs
)
+ elif args.quantize_with_bnb:
+ from transformers import BitsAndBytesConfig
+
+ nf4_config = BitsAndBytesConfig(
+ load_in_4bit=True,
+ bnb_4bit_use_double_quant=True,
+ bnb_4bit_quant_type="nf4",
+ bnb_4bit_compute_dtype=torch.bfloat16,
+ )
+ model = AutoModelForCausalLM.from_pretrained(
+ args.model_name_or_path,
+ quantization_config=nf4_config,
+ device_map={"": "hpu"},
+ torch_dtype=model_dtype,
+ **model_kwargs,
+ )
elif args.load_quantized_model_with_inc:
# TODO: This will be removed in v1.20 Synapse release
# Override neural_compressor split_rank_state_dict for loading neural_magic models on multi-cards.
diff --git a/examples/text-to-speech/requirements.txt b/examples/text-to-speech/requirements.txt
index 01d3da67aa..92217bf900 100644
--- a/examples/text-to-speech/requirements.txt
+++ b/examples/text-to-speech/requirements.txt
@@ -1,3 +1,3 @@
-datasets
-soundfile
+datasets == 3.6.0
+soundfile == 0.12.1
sentencepiece
diff --git a/examples/translation/requirements.txt b/examples/translation/requirements.txt
index ff9ede1567..458e933076 100644
--- a/examples/translation/requirements.txt
+++ b/examples/translation/requirements.txt
@@ -1,7 +1,7 @@
-datasets >= 2.4.0
+datasets == 3.6.0
sentencepiece != 0.1.92
-protobuf
-sacrebleu >= 1.4.12
-py7zr
+protobuf == 3.20.3
+sacrebleu >= 1.4.12, <= 2.4.2
+py7zr == 0.21.0
torch >= 1.3
-evaluate
+evaluate == 0.4.3
diff --git a/examples/translation/run_translation.py b/examples/translation/run_translation.py
index c1d8a07d1d..a1892c3371 100644
--- a/examples/translation/run_translation.py
+++ b/examples/translation/run_translation.py
@@ -63,7 +63,7 @@ def check_optimum_habana_min_version(*a, **b):
# Will error if the minimal version of Transformers and Optimum Habana are not installed. Remove at your own risks.
check_min_version("4.51.0")
-check_optimum_habana_min_version("1.18.0.dev0")
+check_optimum_habana_min_version("1.19.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
diff --git a/examples/trl/requirements.txt b/examples/trl/requirements.txt
index 502a2d99a8..17e8b74935 100644
--- a/examples/trl/requirements.txt
+++ b/examples/trl/requirements.txt
@@ -1,6 +1,7 @@
trl == 0.9.6
peft == 0.15.0
datasets == 2.19.2
-tyro
-evaluate
+wandb == 0.17.1
+tyro == 0.8.4
+evaluate == 0.4.3
scikit-learn == 1.5.2
diff --git a/examples/video-comprehension/run_example.py b/examples/video-comprehension/run_example.py
index b53679fb0b..5868bea3e8 100644
--- a/examples/video-comprehension/run_example.py
+++ b/examples/video-comprehension/run_example.py
@@ -24,10 +24,10 @@
import numpy as np
import torch
from huggingface_hub import hf_hub_download
+from transformers import VideoLlavaProcessor
from optimum.habana.transformers.modeling_utils import (
GaudiVideoLlavaForConditionalGeneration,
- GaudiVideoLlavaProcessor,
adapt_transformers_to_gaudi,
)
@@ -168,7 +168,7 @@ def main():
model = wrap_in_hpu_graph(model)
- processor = GaudiVideoLlavaProcessor.from_pretrained(args.model_name_or_path)
+ processor = VideoLlavaProcessor.from_pretrained(args.model_name_or_path)
processor.tokenizer.padding_side = "left"
inputs = processor(text=prompts, videos=video_clips, return_tensors="pt")
inputs = inputs.to(device)
diff --git a/optimum/habana/quantizers/bitsandbytes.py b/optimum/habana/quantizers/bitsandbytes.py
deleted file mode 100644
index ee56b55d53..0000000000
--- a/optimum/habana/quantizers/bitsandbytes.py
+++ /dev/null
@@ -1,265 +0,0 @@
-from functools import lru_cache
-from typing import Any, Dict, List, Optional
-
-from transformers.modeling_utils import PreTrainedModel
-from transformers.pytorch_utils import Conv1D
-from transformers.quantizers.quantizers_utils import get_module_from_name
-from transformers.utils import (
- ACCELERATE_MIN_VERSION,
- get_available_devices,
- is_accelerate_available,
- is_bitsandbytes_multi_backend_available,
- is_ipex_available,
- is_torch_available,
- logging,
-)
-from transformers.utils.import_utils import _is_package_available
-
-
-if is_torch_available():
- import torch
-
-_bitsandbytes_available = _is_package_available("bitsandbytes")
-logger = logging.get_logger(__name__)
-
-
-def gaudi_bitsandbytesconfig_post_init(self):
- r"""
- Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.
- Copied from https://github.com/huggingface/transformers/blob/53fad641cfdb5105e2470bcf3ef17ea8e25cc300/src/transformers/utils/quantization_config.py#L430
- Only difference is removed check on bitsandbytes version
- """
- if not isinstance(self.load_in_4bit, bool):
- raise TypeError("load_in_4bit must be a boolean")
-
- if not isinstance(self.load_in_8bit, bool):
- raise TypeError("load_in_8bit must be a boolean")
-
- if not isinstance(self.llm_int8_threshold, float):
- raise TypeError("llm_int8_threshold must be a float")
-
- if self.llm_int8_skip_modules is not None and not isinstance(self.llm_int8_skip_modules, list):
- raise TypeError("llm_int8_skip_modules must be a list of strings")
- if not isinstance(self.llm_int8_enable_fp32_cpu_offload, bool):
- raise TypeError("llm_int8_enable_fp32_cpu_offload must be a boolean")
-
- if not isinstance(self.llm_int8_has_fp16_weight, bool):
- raise TypeError("llm_int8_has_fp16_weight must be a boolean")
-
- if self.bnb_4bit_compute_dtype is not None and not isinstance(self.bnb_4bit_compute_dtype, torch.dtype):
- raise TypeError("bnb_4bit_compute_dtype must be torch.dtype")
-
- if not isinstance(self.bnb_4bit_quant_type, str):
- raise TypeError("bnb_4bit_quant_type must be a string")
-
- if not isinstance(self.bnb_4bit_use_double_quant, bool):
- raise TypeError("bnb_4bit_use_double_quant must be a boolean")
-
-
-@lru_cache()
-def gaudi_is_bitsandbytes_available():
- """
- Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/utils/import_utils.py#L871
- Only difference is that CUDA related checks are removed.
- """
- if not is_torch_available() or not _bitsandbytes_available:
- return False
-
- # Newer versions of `bitsandbytes` can be imported on systems without CUDA.
- return True
-
-
-def gaudi_validate_bnb_backend_availability(raise_exception=False):
- """
- Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
- Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/integrations/bitsandbytes.py#L545
- Only difference is that CUDA related functions calls are deleted.
- """
- if is_bitsandbytes_multi_backend_available():
- return _gaudi_validate_bnb_multi_backend_availability(raise_exception)
-
-
-def _gaudi_validate_bnb_multi_backend_availability(raise_exception):
- """
- Copied https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/integrations/bitsandbytes.py#L484
- Only difference is addition of check for HPU
- """
- import bitsandbytes as bnb
-
- bnb_supported_devices = getattr(bnb, "supported_torch_devices", set())
- available_devices = get_available_devices()
-
- if "hpu" in bnb_supported_devices:
- logger.debug("Multi-backend validation successful.")
- return True
-
- if available_devices == {"cpu"} and not is_ipex_available():
- from importlib.util import find_spec
-
- if find_spec("intel_extension_for_pytorch"):
- logger.warning(
- "You have Intel IPEX installed but if you're intending to use it for CPU, it might not have the right version. Be sure to double check that your PyTorch and IPEX installs are compatible."
- )
-
- available_devices.discard("cpu") # Only Intel CPU is supported by BNB at the moment
-
- if not available_devices.intersection(bnb_supported_devices):
- if raise_exception:
- bnb_supported_devices_with_info = set( # noqa: C401
- '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)'
- if device == "cpu"
- else device
- for device in bnb_supported_devices
- )
- err_msg = (
- f"None of the available devices `available_devices = {available_devices or None}` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {bnb_supported_devices_with_info}`. "
- "Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend"
- )
-
- logger.error(err_msg)
- raise RuntimeError(err_msg)
-
- logger.warning("No supported devices found for bitsandbytes multi-backend.")
- return False
-
- logger.debug("Multi-backend validation successful.")
- return True
-
-
-def gaudi_validate_environment(self, *args, **kwargs):
- """
- Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/quantizers/quantizer_bnb_4bit.py#L68
- Only difference is deletion of bitsandbytes version checks
- """
- if not is_accelerate_available():
- raise ImportError(
- f"Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
- )
- if not gaudi_is_bitsandbytes_available():
- raise ImportError(
- "Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`"
- )
-
- bnb_multibackend_is_enabled = is_bitsandbytes_multi_backend_available()
- gaudi_validate_bnb_backend_availability(raise_exception=True)
-
- if kwargs.get("from_tf", False) or kwargs.get("from_flax", False):
- raise ValueError(
- "Converting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make"
- " sure the weights are in PyTorch format."
- )
-
- device_map = kwargs.get("device_map", None)
- if (
- device_map is not None
- and isinstance(device_map, dict)
- and not self.quantization_config.llm_int8_enable_fp32_cpu_offload
- ):
- device_map_without_lm_head = {
- key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert
- }
- if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled:
- pass
- elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
- raise ValueError(
- "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
- "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
- "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
- "`from_pretrained`. Check "
- "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
- "for more details. "
- )
-
-
-def gaudi_create_quantized_param(
- self,
- model: "PreTrainedModel",
- param_value: "torch.Tensor",
- param_name: str,
- target_device: "torch.device",
- state_dict: Dict[str, Any],
- unexpected_keys: Optional[List[str]] = None,
-):
- """
- Copied from https://github.com/huggingface/transformers/blob/62c60a30181a65e1a3a7f19c3055a240a6a21335/src/transformers/quantizers/quantizer_bnb_4bit.py#L138
- only diiference is addition of HPU device
- """
- import bitsandbytes as bnb
-
- module, tensor_name = get_module_from_name(model, param_name)
-
- if tensor_name not in module._parameters:
- raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.")
-
- old_value = getattr(module, tensor_name)
-
- if tensor_name == "bias":
- if param_value is None:
- new_value = old_value.to(target_device)
- else:
- new_value = param_value.to(target_device)
-
- new_value = torch.nn.Parameter(new_value, requires_grad=old_value.requires_grad)
- module._parameters[tensor_name] = new_value
- return
-
- if not isinstance(module._parameters[tensor_name], bnb.nn.Params4bit):
- raise ValueError("this function only loads `Linear4bit components`")
- if (
- old_value.device == torch.device("meta")
- and target_device not in ["meta", torch.device("meta")]
- and param_value is None
- ):
- raise ValueError(f"{tensor_name} is on the meta device, we need a `value` to put in on {target_device}.")
-
- # construct `new_value` for the module._parameters[tensor_name]:
- if self.pre_quantized:
- # 4bit loading. Collecting components for restoring quantized weight
- # This can be expanded to make a universal call for any quantized weight loading
-
- if not self.is_serializable:
- raise ValueError(
- "Detected int4 weights but the version of bitsandbytes is not compatible with int4 serialization. "
- "Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`."
- )
-
- if (param_name + ".quant_state.bitsandbytes__fp4" not in state_dict) and (
- param_name + ".quant_state.bitsandbytes__nf4" not in state_dict
- ):
- raise ValueError(
- f"Supplied state dict for {param_name} does not contain `bitsandbytes__*` and possibly other `quantized_stats` components."
- )
-
- quantized_stats = {}
- for k, v in state_dict.items():
- if param_name + "." in k:
- quantized_stats[k] = v
- if unexpected_keys is not None and k in unexpected_keys:
- unexpected_keys.remove(k)
-
- param_kwargs = {}
- if self.is_bnb_supports_quant_storage_module:
- param_kwargs["module"] = module
-
- new_value = bnb.nn.Params4bit.from_prequantized(
- data=param_value,
- quantized_stats=quantized_stats,
- requires_grad=False,
- device=target_device,
- **param_kwargs,
- )
- else:
- if target_device == "hpu":
- new_value = param_value.to("hpu")
- else:
- new_value = param_value.to("cpu")
-
- # Support models using `Conv1D` in place of `nn.Linear` (e.g. openai-community/gpt2) by transposing the weight matrix prior to quantization.
- # Since weights are saved in the correct "orientation", we skip transposing when loading.
- if issubclass(module.source_cls, Conv1D):
- new_value = new_value.T
-
- kwargs = old_value.__dict__
- new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(target_device)
-
- module._parameters[tensor_name] = new_value
diff --git a/optimum/habana/transformers/generation/utils.py b/optimum/habana/transformers/generation/utils.py
index a8b1858e99..e8984c05b4 100755
--- a/optimum/habana/transformers/generation/utils.py
+++ b/optimum/habana/transformers/generation/utils.py
@@ -134,6 +134,7 @@
"qwen2_vl",
"qwen3",
"qwen3_moe",
+ "arctic",
]
# Initial generated token index is set to 1 to accomodate SOS (start of string) token.
@@ -2473,6 +2474,7 @@ def _contrastive_search(
do_padding = (
key_to_check is not None
and outputs.past_key_values[0][0].shape[2] == model_inputs[key_to_check].shape[1]
+ and generation_config.max_new_tokens > 1
)
if do_padding:
@@ -2837,6 +2839,7 @@ def _sample(
do_padding = (
key_to_check is not None
and outputs.past_key_values[0][0].shape[2] == model_inputs[key_to_check].shape[1]
+ and generation_config.max_new_tokens > 1
)
if do_padding:
diff --git a/optimum/habana/transformers/modeling_utils.py b/optimum/habana/transformers/modeling_utils.py
index 66802f7135..7b86a35b81 100644
--- a/optimum/habana/transformers/modeling_utils.py
+++ b/optimum/habana/transformers/modeling_utils.py
@@ -18,13 +18,6 @@
import transformers
import transformers.utils.fx
-from ..quantizers.bitsandbytes import (
- gaudi_bitsandbytesconfig_post_init,
- gaudi_create_quantized_param,
- gaudi_is_bitsandbytes_available,
- gaudi_validate_bnb_backend_availability,
- gaudi_validate_environment,
-)
from .generation import (
GaudiGenerationConfig,
GaudiGenerationMixin,
@@ -43,6 +36,9 @@
from .loss import gaudi_RTDetrHungarianMatcher_forward
from .models import (
GAUDI_WHISPER_ATTENTION_CLASSES,
+ ArcticConfig,
+ ArcticForCausalLM,
+ ArcticTokenizer,
BaichuanConfig,
BaichuanForCausalLM,
BaichuanTokenizer,
@@ -192,7 +188,6 @@
GaudiStarcoder2ForCausalLM,
GaudiStarcoder2Model,
GaudiVideoLlavaForConditionalGeneration,
- GaudiVideoLlavaProcessor,
GaudiVisionSdpaAttention,
GaudiWav2Vec2SdpaAttention,
GaudiWhisperDecoder,
@@ -323,14 +318,6 @@ def adapt_transformers_to_gaudi():
for Gaudi.
"""
- transformers.utils.quantization_config.BitsAndBytesConfig.post_init = gaudi_bitsandbytesconfig_post_init
- transformers.utils.import_utils.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
- transformers.utils.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
- transformers.quantizers.quantizer_bnb_4bit.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
- transformers.integrations.bitsandbytes.validate_bnb_backend_availability = gaudi_validate_bnb_backend_availability
- transformers.quantizers.quantizer_bnb_4bit.Bnb4BitHfQuantizer.validate_environment = gaudi_validate_environment
- transformers.quantizers.quantizer_bnb_4bit.Bnb4BitHfQuantizer.create_quantized_param = gaudi_create_quantized_param
-
# models that support symbolic tracing should be added to this list
models_with_tracing_support = []
@@ -776,7 +763,6 @@ def adapt_transformers_to_gaudi():
transformers.models.video_llava.modeling_video_llava.VideoLlavaForConditionalGeneration = (
GaudiVideoLlavaForConditionalGeneration
)
- transformers.models.video_llava.processing_video_llava.VideoLlavaProcessor = GaudiVideoLlavaProcessor
# Optimization for Whisper on Gaudi
transformers.models.whisper.modeling_whisper.WhisperSdpaAttention = GaudiWhisperSdpaAttention
@@ -874,3 +860,7 @@ def adapt_transformers_to_gaudi():
# Optimization for RT-DETR model on Gaudi
transformers.loss.loss_rt_detr.RTDetrHungarianMatcher.forward = gaudi_RTDetrHungarianMatcher_forward
+
+ transformers.AutoConfig.register("arctic", ArcticConfig)
+ transformers.AutoModelForCausalLM.register(ArcticConfig, ArcticForCausalLM)
+ transformers.AutoTokenizer.register(ArcticConfig, ArcticTokenizer)
diff --git a/optimum/habana/transformers/models/__init__.py b/optimum/habana/transformers/models/__init__.py
index 81ebae6e51..9b28749b42 100644
--- a/optimum/habana/transformers/models/__init__.py
+++ b/optimum/habana/transformers/models/__init__.py
@@ -313,6 +313,7 @@
GaudiSiglipVisionModel,
GaudiSiglipVisionTransformer,
)
+from .snowflake import ArcticConfig, ArcticForCausalLM, ArcticTokenizer
from .speecht5 import (
gaudi_generate_speech,
gaudi_SpeechT5Attention_forward,
@@ -341,7 +342,7 @@
gaudi_T5Stack_forward,
)
from .table_transformer import gaudi_table_transformer_conv_encoder_forward
-from .video_llava import GaudiVideoLlavaForConditionalGeneration, GaudiVideoLlavaProcessor
+from .video_llava import GaudiVideoLlavaForConditionalGeneration
from .vision_encoder_decoder import (
gaudi_VisionEncoderDecoderModel_prepare_inputs_for_generation,
)
diff --git a/optimum/habana/transformers/models/llama/modeling_llama.py b/optimum/habana/transformers/models/llama/modeling_llama.py
index 75d8097cc8..4b4b903bfd 100755
--- a/optimum/habana/transformers/models/llama/modeling_llama.py
+++ b/optimum/habana/transformers/models/llama/modeling_llama.py
@@ -522,7 +522,7 @@ def get_k_proj_weight_dtype(self):
if hasattr(self.k_proj, "qweight"):
return self.k_proj.scales.dtype
elif hasattr(self.k_proj, "use_qdq") and self.k_proj.use_qdq:
- return self.k_proj.dequant_weights.hp_dtype
+ return self.k_proj.weight.dtype
elif isinstance(self.k_cache, KVCache) and "float8" in str(self.k_proj.weight.dtype):
return self.k_proj.hp_dtype
return self.k_proj.weight.dtype
@@ -1000,7 +1000,6 @@ def forward(
valid_sequence_lengths=sub_valid_sequence_lengths[i],
cache_idx=cache_idx,
num_virtual_tokens=num_virtual_tokens,
- **kwargs,
)
self.self_attn.attention_all_reduce(split_hidden_states[i])
if output_attentions:
@@ -1044,7 +1043,6 @@ def forward(
valid_sequence_lengths=valid_sequence_lengths,
cache_idx=cache_idx,
num_virtual_tokens=num_virtual_tokens,
- **kwargs,
)
self.self_attn.attention_all_reduce(hidden_states)
hidden_states, residual = self.post_attn_pre_mlp(hidden_states, residual)
diff --git a/optimum/habana/transformers/models/snowflake/__init__.py b/optimum/habana/transformers/models/snowflake/__init__.py
new file mode 100644
index 0000000000..a907bf0e56
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/__init__.py
@@ -0,0 +1,3 @@
+from .configuration_arctic import ArcticConfig
+from .modeling_arctic import ArcticForCausalLM
+from .tokenization_arctic import ArcticTokenizer
diff --git a/optimum/habana/transformers/models/snowflake/configuration_arctic.py b/optimum/habana/transformers/models/snowflake/configuration_arctic.py
new file mode 100644
index 0000000000..bf81f4942c
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/configuration_arctic.py
@@ -0,0 +1,216 @@
+# Copyright 2023 Snowflake AI and the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Arctic model configuration. Copied from https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d."""
+
+from dataclasses import asdict, dataclass
+from typing import Any, Dict
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+ARCTIC_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+ "arctic": "https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/main/config.json",
+}
+
+
+@dataclass
+class ArcticLoraConfig:
+ lora_r: int = 64
+ lora_alpha: float = 16
+ shard_base_weights: bool = False
+
+
+@dataclass
+class ArcticQuantizationConfig:
+ q_bits: int = 8
+ rounding: str = "nearest"
+ mantissa_bits: int = 3
+ group_size: int = 512
+
+
+class ArcticConfig(PretrainedConfig):
+ r"""
+ This is the configuration class to store the configuration of a [`ArcticModel`]. It is used to instantiate an
+ Arctic model according to the specified arguments, defining the model architecture. Instantiating a configuration
+ with the defaults will yield a similar configuration to that of the #TODO(rsamdani): add what model has the default config..
+
+
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+ documentation from [`PretrainedConfig`] for more information.
+
+
+ Args:
+ vocab_size (`int`, *optional*, defaults to 32000):
+ Vocabulary size of the Arctic model. Defines the number of different tokens that can be represented by the
+ `inputs_ids` passed when calling [`ArcticModel`]
+ hidden_size (`int`, *optional*, defaults to 4096):
+ Dimension of the hidden representations.
+ intermediate_size (`int`, *optional*, defaults to 14336):
+ Dimension of the MLP representations.
+ num_hidden_layers (`int`, *optional*, defaults to 32):
+ Number of hidden layers in the Transformer encoder.
+ num_attention_heads (`int`, *optional*, defaults to 32):
+ Number of attention heads for each attention layer in the Transformer encoder.
+ num_key_value_heads (`int`, *optional*, defaults to 8):
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+ `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+ by meanpooling all the original heads within that group. For more details checkout [this
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+ The non-linear activation function (function or string) in the decoder.
+ max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
+ The maximum sequence length that this model might ever be used with. Arctic's sliding window attention
+ allows sequence of up to 4096*32 tokens.
+ initializer_range (`float`, *optional*, defaults to 0.02):
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+ rms_norm_eps (`float`, *optional*, defaults to 1e-05):
+ The epsilon used by the rms normalization layers.
+ use_cache (`bool`, *optional*, defaults to `True`):
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
+ relevant if `config.is_decoder=True`.
+ pad_token_id (`int`, *optional*):
+ The id of the padding token.
+ bos_token_id (`int`, *optional*, defaults to 1):
+ The id of the "beginning-of-sequence" token.
+ eos_token_id (`int`, *optional*, defaults to 2):
+ The id of the "end-of-sequence" token.
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+ Whether the model's input and output word embeddings should be tied.
+ rope_theta (`float`, *optional*, defaults to 1000000.0):
+ The base period of the RoPE embeddings.
+ sliding_window (`int`, *optional*):
+ Sliding window attention window size. If not specified, will default to `4096`.
+ attention_dropout (`float`, *optional*, defaults to 0.0):
+ The dropout ratio for the attention probabilities.
+ num_experts_per_tok (`int`, *optional*, defaults to 2):
+ The number of experts to root per-token, can be also interpreted as the `top-p` routing
+ parameter
+ num_local_experts (`int`, *optional*, defaults to 8):
+ Number of experts per Sparse MLP layer.
+ router_aux_loss_coef (`float`, *optional*, defaults to 0.001):
+ The aux loss factor for the total loss.
+
+ ```python
+ >>> from transformers import ArcticModel, ArcticConfig
+
+ >>> # Initializing a Arctic 7B style configuration TODO(rsamdani): verify which model does the default configuration correspond to.
+ >>> configuration = ArcticConfig()
+
+ >>> # Initializing a model from the Arctic 7B style configuration
+ >>> model = ArcticModel(configuration)
+
+ >>> # Accessing the model configuration
+ >>> configuration = model.config
+ ```"""
+
+ model_type = "arctic"
+ keys_to_ignore_at_inference = ["past_key_values"]
+
+ def __init__(
+ self,
+ vocab_size=32000,
+ hidden_size=4096,
+ intermediate_size=14336,
+ num_hidden_layers=32,
+ num_attention_heads=32,
+ num_key_value_heads=None,
+ hidden_act="silu",
+ max_position_embeddings=4096,
+ initializer_range=0.02,
+ rms_norm_eps=1e-5,
+ use_cache=True,
+ pad_token_id=None,
+ bos_token_id=1,
+ eos_token_id=2,
+ tie_word_embeddings=False,
+ rope_theta=1e6,
+ sliding_window=None,
+ attention_dropout=0.0,
+ num_experts_per_tok=1,
+ num_local_experts=8,
+ router_aux_loss_coef=0.001,
+ moe_layer_frequency=2,
+ parallel_attn_mlp_res=False,
+ moe_train_capacity_factor=1,
+ moe_eval_capacity_factor=1,
+ enable_expert_tensor_parallelism=False,
+ moe_min_capacity=0,
+ moe_token_dropping=True,
+ quantization=None,
+ **kwargs,
+ ):
+ self.vocab_size = vocab_size
+ self.max_position_embeddings = max_position_embeddings
+ self.hidden_size = hidden_size
+ self.intermediate_size = intermediate_size
+ self.num_hidden_layers = num_hidden_layers
+ self.num_attention_heads = num_attention_heads
+ self.sliding_window = sliding_window
+
+ # for backward compatibility
+ if num_key_value_heads is None:
+ num_key_value_heads = num_attention_heads
+
+ self.num_key_value_heads = num_key_value_heads
+ self.hidden_act = hidden_act
+ self.initializer_range = initializer_range
+ self.rms_norm_eps = rms_norm_eps
+ self.use_cache = use_cache
+ self.rope_theta = rope_theta
+ self.attention_dropout = attention_dropout
+
+ self.num_experts_per_tok = num_experts_per_tok
+ self.num_local_experts = num_local_experts
+ self.router_aux_loss_coef = router_aux_loss_coef
+ self.moe_layer_frequency = moe_layer_frequency
+ self.moe_train_capacity_factor = moe_train_capacity_factor
+ self.moe_eval_capacity_factor = moe_eval_capacity_factor
+ self.enable_expert_tensor_parallelism = enable_expert_tensor_parallelism
+ self.moe_min_capacity = moe_min_capacity
+ self.moe_token_dropping = moe_token_dropping
+ self.parallel_attn_mlp_res = parallel_attn_mlp_res
+ if isinstance(quantization, dict):
+ self.quantization = ArcticQuantizationConfig(**quantization)
+ else:
+ self.quantization = quantization
+
+ super().__init__(
+ pad_token_id=pad_token_id,
+ bos_token_id=bos_token_id,
+ eos_token_id=eos_token_id,
+ tie_word_embeddings=tie_word_embeddings,
+ **kwargs,
+ )
+
+ @classmethod
+ def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "ArcticConfig":
+ result = super().from_dict(config_dict, **kwargs)
+ if isinstance(result, tuple):
+ config = result[0]
+ else:
+ config = result
+ if isinstance(config.quantization, dict):
+ config.quantization = ArcticQuantizationConfig(**config.quantization)
+ return result
+
+ def to_dict(self) -> Dict[str, Any]:
+ ret = super().to_dict()
+ if isinstance(ret["quantization"], ArcticQuantizationConfig):
+ ret["quantization"] = asdict(ret["quantization"])
+ return ret
diff --git a/optimum/habana/transformers/models/snowflake/modeling_arctic.py b/optimum/habana/transformers/models/snowflake/modeling_arctic.py
new file mode 100644
index 0000000000..40e5d9f3d3
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/modeling_arctic.py
@@ -0,0 +1,1511 @@
+# coding=utf-8
+# Copyright 2023 Mistral AI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch Arctic model. Adapted from https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d.
+
+Changes made:
+- Use HPU FusedRoPE implementation
+- Use HPU FusedRMSNorm implementation
+- Added mark steps
+"""
+
+import math
+import warnings
+from typing import List, Optional, Tuple, Union
+
+import habana_frameworks.torch.core as htcore
+import torch
+import torch.nn.functional as F
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers.activations import ACT2FN
+from transformers.cache_utils import Cache
+from transformers.generation import GenerationMixin
+from transformers.integrations.deepspeed import is_deepspeed_available
+from transformers.modeling_attn_mask_utils import (
+ _prepare_4d_causal_attention_mask,
+ _prepare_4d_causal_attention_mask_for_sdpa,
+)
+from transformers.modeling_outputs import (
+ MoeCausalLMOutputWithPast,
+ MoeModelOutputWithPast,
+ SequenceClassifierOutputWithPast,
+)
+from transformers.modeling_utils import PreTrainedModel
+from transformers.utils import (
+ add_start_docstrings,
+ add_start_docstrings_to_model_forward,
+ logging,
+ replace_return_docstrings,
+)
+
+from ..llama.modeling_llama import (
+ GaudiLlamaRotaryEmbedding,
+)
+from ..mixtral.modeling_mixtral import GaudiMixtralAttentionLongSequence
+from ..modeling_all_models import KVCache, apply_customized_rope_module
+from .configuration_arctic import ArcticConfig
+
+
+try:
+ from habana_frameworks.torch.hpex.kernels import RotaryPosEmbeddingHelperV2 as FusedRoPE
+except ImportError:
+ print("Not using HPU fused kernel for apply_rotary_pos_emb")
+ FusedRoPE = None
+
+try:
+ from habana_frameworks.torch.hpex.normalization import FusedRMSNorm
+except ImportError:
+ print("Not using HPU fused kernel for RMSNorm")
+ FusedRMSNorm = None
+
+try:
+ from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+ print("Not using HPU fused scaled dot-product attention kernel.")
+ FusedSDPA = None
+
+
+deepspeed_available = is_deepspeed_available()
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "ArcticConfig"
+USE_DEEPSPEED_MOE_ARG = "use_deepspeed_moe_implementation"
+MOE_EXPERT_PARALLEL_SIZE_ARG = "moe_expert_parallel_size"
+DEEPSPEED_QUANTIZATION_CONFIG = "deepspeed_quantization"
+DEEPSPEED_LORA_CONFIG = "deepspeed_lora"
+QUANTIZATION_CONFIG = "ds_quantization_config"
+
+
+def load_balancing_loss_func(
+ gate_logits: torch.Tensor, num_experts: torch.Tensor = None, top_k=4, attention_mask: Optional[torch.Tensor] = None
+) -> float:
+ r"""
+ Computes auxiliary load balancing loss as in Switch Transformer - implemented in Pytorch.
+
+ See Switch Transformer (https://arxiv.org/abs/2101.03961) for more details. This function implements the loss
+ function presented in equations (4) - (6) of the paper. It aims at penalizing cases where the routing between
+ experts is too unbalanced.
+
+ Args:
+ gate_logits (Union[`torch.Tensor`, Tuple[torch.Tensor]):
+ Logits from the `gate`, should be a tuple of model.config.num_hidden_layers tensors of
+ shape [batch_size X sequence_length, num_experts].
+ attention_mask (`torch.Tensor`, None):
+ The attention_mask used in forward function
+ shape [batch_size X sequence_length] if not None.
+ num_experts (`int`, *optional*):
+ Number of experts
+
+ Returns:
+ The auxiliary loss.
+ """
+ if gate_logits is None or not isinstance(gate_logits, tuple):
+ return 0
+
+ if isinstance(gate_logits, tuple):
+ compute_device = gate_logits[0].device
+ concatenated_gate_logits = torch.cat([layer_gate.to(compute_device) for layer_gate in gate_logits], dim=0)
+
+ routing_weights = torch.nn.functional.softmax(concatenated_gate_logits, dim=-1)
+
+ _, selected_experts = torch.topk(routing_weights, top_k, dim=-1)
+
+ expert_mask = torch.nn.functional.one_hot(selected_experts, num_experts)
+
+ if attention_mask is None:
+ # Compute the percentage of tokens routed to each experts
+ tokens_per_expert = torch.mean(expert_mask.float(), dim=0)
+
+ # Compute the average probability of routing to these experts
+ router_prob_per_expert = torch.mean(routing_weights, dim=0)
+ else:
+ batch_size, sequence_length = attention_mask.shape
+ num_hidden_layers = concatenated_gate_logits.shape[0] // (batch_size * sequence_length)
+
+ # Compute the mask that masks all padding tokens as 0 with the same shape of expert_mask
+ expert_attention_mask = (
+ attention_mask[None, :, :, None, None]
+ .expand((num_hidden_layers, batch_size, sequence_length, 2, num_experts))
+ .reshape(-1, 2, num_experts)
+ .to(compute_device)
+ )
+
+ # Compute the percentage of tokens routed to each experts
+ tokens_per_expert = torch.sum(expert_mask.float() * expert_attention_mask, dim=0) / torch.sum(
+ expert_attention_mask, dim=0
+ )
+
+ # Compute the mask that masks all padding tokens as 0 with the same shape of tokens_per_expert
+ router_per_expert_attention_mask = (
+ attention_mask[None, :, :, None]
+ .expand((num_hidden_layers, batch_size, sequence_length, num_experts))
+ .reshape(-1, num_experts)
+ .to(compute_device)
+ )
+
+ # Compute the average probability of routing to these experts
+ router_prob_per_expert = torch.sum(routing_weights * router_per_expert_attention_mask, dim=0) / torch.sum(
+ router_per_expert_attention_mask, dim=0
+ )
+
+ overall_loss = torch.sum(tokens_per_expert * router_prob_per_expert.unsqueeze(0))
+ return overall_loss * num_experts
+
+
+# Copied from transformers.models.llama.modeling_llama._get_unpad_data
+def _get_unpad_data(attention_mask):
+ seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
+ indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+ max_seqlen_in_batch = seqlens_in_batch.max().item()
+ cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0))
+ return (
+ indices,
+ cu_seqlens,
+ max_seqlen_in_batch,
+ )
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaRMSNorm with Llama->Arctic
+class ArcticRMSNorm(nn.Module):
+ def __init__(self, hidden_size, eps=1e-6):
+ """
+ ArcticRMSNorm is equivalent to T5LayerNorm
+ """
+ super().__init__()
+ self.weight = nn.Parameter(torch.ones(hidden_size))
+ self.variance_epsilon = eps
+
+ def forward(self, hidden_states):
+ """
+ Copied from optimum/habana/transformers/models/llama/modeling_llama.py gaudi_llama_rmsnorm_forward
+ """
+ if hidden_states.device.type == "hpu" and FusedRMSNorm is not None:
+ # mixed dtypes are not good for FusedRMSNorm, both inputs need to have same dtype
+ if hidden_states.dtype != self.weight.dtype:
+ orig_dtype = hidden_states.dtype
+ hidden_states = FusedRMSNorm.apply(
+ hidden_states.to(self.weight.dtype), self.weight, self.variance_epsilon
+ )
+ return hidden_states.to(orig_dtype)
+ else:
+ hidden_states = FusedRMSNorm.apply(hidden_states, self.weight, self.variance_epsilon)
+ return hidden_states
+ else:
+ input_dtype = hidden_states.dtype
+ hidden_states = hidden_states.to(torch.float32)
+ variance = hidden_states.pow(2).mean(-1, keepdim=True)
+ hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+ return self.weight * hidden_states.to(input_dtype)
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->Arctic
+class ArcticRotaryEmbedding(nn.Module):
+ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
+ super().__init__()
+
+ self.dim = dim
+ self.max_position_embeddings = max_position_embeddings
+ self.base = base
+ inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+ self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+ # Build here to make `torch.jit.trace` work.
+ self._set_cos_sin_cache(
+ seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
+ )
+
+ def _set_cos_sin_cache(self, seq_len, device, dtype):
+ self.max_seq_len_cached = seq_len
+ t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+ freqs = torch.outer(t, self.inv_freq)
+ # Different from paper, but it uses a different permutation in order to obtain the same calculation
+ emb = torch.cat((freqs, freqs), dim=-1)
+ self.register_buffer("_cos_cached", emb.cos().to(dtype), persistent=False)
+ self.register_buffer("_sin_cached", emb.sin().to(dtype), persistent=False)
+
+ def forward(self, x, seq_len=None):
+ # x: [bs, num_attention_heads, seq_len, head_size]
+ if seq_len > self.max_seq_len_cached:
+ self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
+
+ return (
+ self._cos_cached[:seq_len].to(dtype=x.dtype),
+ self._sin_cached[:seq_len].to(dtype=x.dtype),
+ )
+
+
+# Copied from transformers.models.llama.modeling_llama.rotate_half
+def rotate_half(x):
+ """Rotates half the hidden dims of the input."""
+ x1 = x[..., : x.shape[-1] // 2]
+ x2 = x[..., x.shape[-1] // 2 :]
+ return torch.cat((-x2, x1), dim=-1)
+
+
+# Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
+ """Applies Rotary Position Embedding to the query and key tensors.
+
+ Args:
+ q (`torch.Tensor`): The query tensor.
+ k (`torch.Tensor`): The key tensor.
+ cos (`torch.Tensor`): The cosine part of the rotary embedding.
+ sin (`torch.Tensor`): The sine part of the rotary embedding.
+ position_ids (`torch.Tensor`):
+ The position indices of the tokens corresponding to the query and key tensors. For example, this can be
+ used to pass offsetted position ids when working with a KV-cache.
+ unsqueeze_dim (`int`, *optional*, defaults to 1):
+ The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
+ sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
+ that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
+ k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
+ cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
+ the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
+ Returns:
+ `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+ """
+ cos = cos[position_ids].unsqueeze(unsqueeze_dim)
+ sin = sin[position_ids].unsqueeze(unsqueeze_dim)
+ q_embed = (q * cos) + (rotate_half(q) * sin)
+ k_embed = (k * cos) + (rotate_half(k) * sin)
+ return q_embed, k_embed
+
+
+# Copied from optimum/habana/transformers/models/llama/modeling_llama.py gaudi_llama_repeat_kv()
+def repeat_kv(
+ query_states: torch.Tensor,
+ key_states: torch.Tensor,
+ value_states: torch.Tensor,
+ attention_mask: torch.Tensor,
+ n_rep: int,
+):
+ """
+ Copied from repeat_kv: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
+ The only differences are:
+ - Append num_key_value_heads == 1 check as kv states can be broadcasted during matmuls so need to expand and reshape them.
+ - Add new args query_states, key_states, value_states and attention_mask and update the logic for expansion.
+ The query states go from (batch, num_heads, seqlen, head_dim) to (batch, num_key_value_heads, n_rep, seqlen, head_dim)
+ The key/value states go from (batch, num_key_value_heads, seqlen, head_dim) to (batch, num_key_value_heads, 1, seqlen, head_dim)
+ """
+ batch, num_key_value_heads, kv_len, head_dim = key_states.shape
+ if n_rep == 1 or num_key_value_heads == 1:
+ return query_states, key_states, value_states, attention_mask
+
+ new_kv_shape = (batch, num_key_value_heads, 1, kv_len, head_dim)
+ key_states = key_states.reshape(new_kv_shape)
+ value_states = value_states.reshape(new_kv_shape)
+
+ batch, _, q_len, head_dim = query_states.shape
+ new_q_shape = (batch, num_key_value_heads, n_rep, q_len, head_dim)
+ query_states = query_states.reshape(new_q_shape)
+
+ if attention_mask is not None:
+ # Add groups dim and set to 1
+ attention_mask = attention_mask.unsqueeze(1)
+
+ return query_states, key_states, value_states, attention_mask
+
+
+# Copied from transformers.models.mistral.modeling_mistral.MistralAttention with Mistral->Arctic
+class ArcticAttention(nn.Module):
+ """
+ Multi-headed attention from 'Attention Is All You Need' paper. Modified to use sliding window attention: Longformer
+ and "Generating Long Sequences with Sparse Transformers".
+ """
+
+ def __init__(self, config: ArcticConfig, layer_idx: Optional[int] = None, **kwargs):
+ super().__init__()
+ config.rope_scaling = getattr(config, "rope_scaling", None)
+ self.config = config
+ self.layer_idx = layer_idx
+ if layer_idx is None:
+ logger.warning_once(
+ f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
+ "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
+ "when creating this class."
+ )
+
+ self.rotary_emb = GaudiLlamaRotaryEmbedding(config=self.config)
+ self.k_cache = KVCache()
+ self.v_cache = KVCache()
+ self.inp_seq_len = -1
+ self.block_size = 1024
+
+ self.hidden_size = config.hidden_size
+ self.num_heads = config.num_attention_heads
+ self.head_dim = self.hidden_size // self.num_heads
+ self.num_key_value_heads = config.num_key_value_heads
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+ self.max_position_embeddings = config.max_position_embeddings
+ self.rope_theta = config.rope_theta
+ self.is_causal = True
+ self.attention_dropout = config.attention_dropout
+ if (self.head_dim * self.num_heads) != self.hidden_size:
+ raise ValueError(
+ f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
+ f" and `num_heads`: {self.num_heads})."
+ )
+
+ self.q_proj = nn.Linear(
+ self.hidden_size,
+ self.num_heads * self.head_dim,
+ bias=False,
+ )
+ self.k_proj = nn.Linear(
+ self.hidden_size,
+ self.num_key_value_heads * self.head_dim,
+ bias=False,
+ )
+ self.v_proj = nn.Linear(
+ self.hidden_size,
+ self.num_key_value_heads * self.head_dim,
+ bias=False,
+ )
+ self.o_proj = nn.Linear(
+ self.hidden_size,
+ self.hidden_size,
+ bias=False,
+ )
+
+ def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+ return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+ def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+ """
+ Allocate KV cache. Copied from ../mixtral/modeling_mixtral.py GaudiMixtralAttention.allocate_kv_cache
+ """
+ cache_shape = (batch_size, self.num_key_value_heads, max_seq_len, self.head_dim)
+ device = self.k_proj.weight.device
+ dtype = self.config.torch_dtype
+ self.k_cache.allocate(inp_seq_len, dtype, device, cache_shape)
+ self.v_cache.allocate(inp_seq_len, dtype, device, cache_shape)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Cache] = None,
+ output_attentions: bool = False,
+ use_cache: bool = False,
+ cache_position: Optional[torch.LongTensor] = None,
+ token_idx: Optional[torch.Tensor] = None,
+ reuse_cache: Optional[bool] = False,
+ flash_attention_recompute: Optional[bool] = False,
+ cache_idx: Optional[int] = None,
+ **kwargs,
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+ """
+ Adapted from ArcticAttention.forward: https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d
+
+ Referenece Gaudi implementation from ../mixtral/modeling_mixtral.py GaudiMixtralAttention
+
+ Changes made:
+ - Added new args
+ - token_idx
+ - attn_softmax_bf16
+ - reuse_cache
+ - flash_attention_recompute
+ - cache_idx
+ - Optimize KV cache
+ - Use FusedSDPA attention
+ """
+ if "padding_mask" in kwargs:
+ warnings.warn(
+ "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+ )
+ bsz, q_len, _ = hidden_states.size()
+
+ query_states = self.q_proj(hidden_states)
+ key_states = self.k_proj(hidden_states)
+ value_states = self.v_proj(hidden_states)
+
+ query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+ key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+ value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+
+ kv_seq_len = key_states.shape[-2]
+ if past_key_value is not None:
+ if self.layer_idx is None:
+ raise ValueError(
+ f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+ "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+ "with a layer index."
+ )
+ if token_idx is None:
+ if hasattr(past_key_value, "get_usable_length"):
+ kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+ else:
+ kv_seq_len += past_key_value[0].shape[-2]
+ else:
+ if reuse_cache:
+ kv_seq_len = past_key_value[0][-2]
+ else:
+ kv_seq_len = past_key_value[0].shape[-2]
+ cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+ query_states, key_states = apply_customized_rope(
+ query_states, key_states, cos, sin, position_ids, self.training
+ )
+
+ if use_cache:
+ if reuse_cache:
+ key_states = self.k_cache(key_states, 2, token_idx)
+ value_states = self.v_cache(value_states, 2, token_idx)
+ past_key_value = (self.k_cache.get_shape(), self.v_cache.get_shape())
+ else:
+ if past_key_value is None:
+ past_key = torch.zeros(key_states.shape, dtype=self.k_proj.weight.dtype, device=key_states.device)
+ past_value = torch.zeros(
+ key_states.shape, dtype=self.k_proj.weight.dtype, device=key_states.device
+ )
+ past_key_value = (past_key, past_value)
+ key_states = self.k_cache.update(past_key_value[0], key_states, 2, token_idx, self.inp_seq_len)
+ value_states = self.v_cache.update(past_key_value[1], value_states, 2, token_idx, self.inp_seq_len)
+ if token_idx is None:
+ past_key_value = (key_states, value_states)
+
+ if cache_idx is not None and q_len == 1:
+ key_states = key_states[:, :, :cache_idx, :]
+ value_states = value_states[:, :, :cache_idx, :]
+ if attention_mask is not None:
+ attention_mask = attention_mask[:, :, :, :cache_idx]
+ kv_seq_len = key_states.shape[-2]
+ else:
+ past_key_value = None
+
+ if FusedSDPA is not None:
+ if query_states.dtype != key_states.dtype:
+ key_states = key_states.type(query_states.dtype)
+ value_states = value_states.type(query_states.dtype)
+ # support long sequences exceeding 8192
+ if not self.training and q_len == key_states.size(-2) and q_len > 8192:
+ htcore.mark_step()
+ attn_output = GaudiMixtralAttentionLongSequence.forward(
+ query_states,
+ key_states,
+ value_states,
+ attention_mask,
+ False,
+ self.block_size,
+ )
+ htcore.mark_step()
+ else:
+ attn_output = FusedSDPA.apply(
+ query_states,
+ key_states,
+ value_states,
+ attention_mask,
+ 0.0,
+ False,
+ None,
+ "None",
+ flash_attention_recompute,
+ )
+ else:
+ # repeat k/v heads if n_kv_heads < n_heads
+ query_states, key_states, value_states, attention_mask = repeat_kv(
+ query_states, key_states, value_states, attention_mask, self.num_key_value_groups
+ )
+
+ attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
+
+ if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+ f" {attn_weights.size()}"
+ )
+
+ if attention_mask is not None:
+ if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+ raise ValueError(
+ f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+ )
+
+ attn_weights = attn_weights + attention_mask
+
+ # upcast attention to fp32
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
+ attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
+ attn_output = torch.matmul(attn_weights, value_states)
+
+ if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
+ raise ValueError(
+ f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
+ f" {attn_output.size()}"
+ )
+
+ attn_output = attn_output.transpose(1, 2).contiguous()
+ attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
+
+ attn_output = self.o_proj(attn_output)
+
+ if not output_attentions or FusedSDPA:
+ attn_weights = None
+
+ return attn_output, attn_weights, past_key_value
+
+
+class ArcticMLP(nn.Module):
+ def __init__(
+ self,
+ config: ArcticConfig,
+ is_residual_mlp=False,
+ ):
+ """MLP class for Arctic supporting vanilla linear layers as well as some deepspeed optimizations.
+
+ ds_optimized_lora_config: config of type ds_linear.LoRAConfig that contains lora specific parameter if we want to add lora to this layer.
+ ds_optimized_quantization_config: config of type ds_linear.QuantizationConfig.
+ ds_optimized_base_weight_sharding: bool. If true, the base weight for lora (provided ds_optimized_lora_config is not None) will be sharded across all available gpus
+ in a tensor parallel way.
+ is_residual_mlp: bool. If true, this is MLP inside arctic residual layer which has ffn_dim the same as full intermediate_size.
+ """
+ super(ArcticMLP, self).__init__()
+ self.hidden_dim = config.hidden_size
+ self.ffn_dim = config.intermediate_size if not is_residual_mlp else self.hidden_dim
+ self.w1 = nn.Linear(
+ self.hidden_dim,
+ self.ffn_dim,
+ bias=False,
+ )
+ self.w2 = nn.Linear(
+ self.ffn_dim,
+ self.hidden_dim,
+ bias=False,
+ )
+ self.w3 = nn.Linear(
+ self.hidden_dim,
+ self.ffn_dim,
+ bias=False,
+ )
+ self.act_fn = ACT2FN[config.hidden_act]
+
+ def forward(self, hidden_states):
+ current_hidden_states = self.act_fn(self.w1(hidden_states)) * self.w3(hidden_states)
+ current_hidden_states = self.w2(current_hidden_states)
+ return current_hidden_states
+
+
+class ArcticMoE(nn.Module):
+ def __init__(self, config: ArcticConfig, layer_id: int, **kwargs):
+ super(ArcticMoE, self).__init__()
+
+ self.hidden_dim = config.hidden_size
+ self.num_experts = config.num_local_experts
+ self.layer_id = layer_id
+ self.top_k = config.num_experts_per_tok
+ self.is_moe_layer = (layer_id + 1) % config.moe_layer_frequency == 0
+
+ if not self.is_moe_layer: # dense, not MoE
+ self.mlp = ArcticMLP(config)
+ else:
+ # "local" MoE implementation
+ self.gate = nn.Linear(self.hidden_dim, self.num_experts, bias=False)
+ self.experts = nn.ModuleList([ArcticMLP(config) for i in range(self.num_experts)])
+
+ # if torch.distributed.get_rank() == 0:
+ # deepspeed.runtime.utils.see_memory_usage("", force=True)
+
+ # Similar in behavior to transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock.forward but more efficient.
+ def _moe_foreward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+ batch_size, sequence_length, hidden_dim = hidden_states.shape
+ hidden_states = hidden_states.view(-1, hidden_dim)
+ # router_logits: (batch * sequence_length, n_experts)
+ router_logits = self.gate(hidden_states)
+
+ routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
+ routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
+ routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
+ # we cast back to the input dtype
+ routing_weights = routing_weights.to(hidden_states.dtype)
+
+ final_hidden_states = torch.zeros(
+ (batch_size, sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
+ )
+
+ padded_weights = torch.zeros(
+ (batch_size * sequence_length, self.num_experts), dtype=hidden_states.dtype, device=hidden_states.device
+ )
+ padded_weights.scatter_(-1, selected_experts, routing_weights)
+ padded_weights = padded_weights.reshape(-1, sequence_length, self.num_experts)
+ padded_weights = padded_weights.permute(2, 0, 1).unsqueeze(-1)
+
+ # Loop over all available experts in the model and perform the computation on each expert
+ for expert_idx in range(self.num_experts):
+ expert_layer = self.experts[expert_idx]
+ padded_weight = padded_weights[expert_idx]
+ current_state_static = hidden_states.reshape(-1, hidden_dim)
+ current_hidden_states_static = (
+ expert_layer(current_state_static).reshape(-1, sequence_length, hidden_dim) * padded_weight
+ )
+ final_hidden_states += current_hidden_states_static
+ # support long sequences exceeding 8192
+ if not self.training and sequence_length > 8192:
+ htcore.mark_step()
+ final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
+ return final_hidden_states, load_balancing_loss_func(
+ (router_logits,), self.num_experts, self.top_k
+ ) # ZY: let's directly output the loss to align what we have in ds
+
+ def forward(self, hidden_states: torch.Tensor):
+ if self.is_moe_layer:
+ return self._moe_foreward(hidden_states)
+ else:
+ return self.mlp(hidden_states), torch.tensor(0.0, device=hidden_states.device, dtype=hidden_states.dtype)
+
+
+class ArcticDecoderLayer(nn.Module):
+ def __init__(self, config: ArcticConfig, layer_idx: int, **kwargs):
+ super().__init__()
+ self.layer_idx = layer_idx
+ self.hidden_size = config.hidden_size
+ self.self_attn = ArcticAttention(config, layer_idx, **kwargs)
+ self.block_sparse_moe = ArcticMoE(config, layer_id=layer_idx, **kwargs)
+ self.input_layernorm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.post_attention_layernorm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ self.parallel_attn_mlp_res = (
+ config.parallel_attn_mlp_res and self.block_sparse_moe.is_moe_layer
+ ) # add residual only when it is moe layer
+ if self.parallel_attn_mlp_res:
+ self.residual_layernorm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+ self.residual_mlp = ArcticMLP(
+ config,
+ is_residual_mlp=True,
+ ) # for the residual layer. always shard the base weight if doing deepspeed lora.
+
+ def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+ self.self_attn.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+ def forward(
+ self,
+ hidden_states: torch.Tensor,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
+ output_attentions: Optional[bool] = False,
+ use_cache: Optional[bool] = False,
+ cache_position: Optional[torch.LongTensor] = None,
+ token_idx: Optional[torch.Tensor] = None,
+ reuse_cache: Optional[bool] = False,
+ flash_attention_recompute: Optional[bool] = False,
+ cache_idx: Optional[int] = None,
+ **kwargs,
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+ """
+ Modified from original Arctic forward
+ Changes:
+ - Add new arg cache_position
+ - Add new arg token_idx
+ - Add new arg reuse_cache
+ - Add new arg flash_attention_recompute
+ - Add new arg cache_idx
+
+ Args:
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+ `(batch, sequence_length)` where padding elements are indicated by 0.
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+ returned tensors for more detail.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+ (see `past_key_values`).
+ """
+
+ if "padding_mask" in kwargs:
+ warnings.warn(
+ "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+ )
+
+ residual_input = hidden_states
+
+ hidden_states = self.input_layernorm(hidden_states)
+
+ # Self Attention
+ hidden_states, self_attn_weights, present_key_value = self.self_attn(
+ hidden_states=hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=past_key_value,
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cache_position=cache_position,
+ token_idx=token_idx,
+ reuse_cache=reuse_cache,
+ flash_attention_recompute=flash_attention_recompute,
+ cache_idx=cache_idx,
+ )
+ hidden_states = residual_input + hidden_states
+
+ residual_attn = hidden_states
+
+ if self.parallel_attn_mlp_res:
+ # Note the architecture here is that the MOE layers reads the **pre-attention** input while there is a "normal" transformer residual part.
+ # This is to achieve better parallelization.
+
+ # residual mlp part
+
+ hidden_states = self.residual_layernorm(hidden_states)
+ hidden_states = self.residual_mlp(hidden_states)
+ residual_residual = residual_attn + hidden_states
+ # parallel mlp moe part
+ hidden_states = self.post_attention_layernorm(residual_input) # parallel attn mlp has the same input
+ hidden_states, gate_loss = self.block_sparse_moe(hidden_states)
+ hidden_states = residual_residual + hidden_states
+ else:
+ hidden_states = self.post_attention_layernorm(hidden_states)
+ hidden_states, gate_loss = self.block_sparse_moe(hidden_states)
+ hidden_states = residual_attn + hidden_states
+
+ outputs = (hidden_states,)
+
+ if output_attentions:
+ outputs += (self_attn_weights,)
+
+ if use_cache:
+ outputs += (present_key_value,)
+
+ outputs += (gate_loss,)
+
+ return outputs
+
+
+ARCTIC_START_DOCSTRING = r"""
+ This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+ library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+ etc.)
+
+ This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+ and behavior.
+
+ Parameters:
+ config ([`ArcticConfig`]):
+ Model configuration class with all the parameters of the model. Initializing with a config file does not
+ load the weights associated with the model, only the configuration. Check out the
+ [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+
+@add_start_docstrings(
+ "The bare Arctic Model outputting raw hidden-states without any specific head on top.",
+ ARCTIC_START_DOCSTRING,
+)
+# Copied from transformers.models.mistral.modeling_mistral.MistralPreTrainedModel with Mistral->Arctic
+class ArcticPreTrainedModel(PreTrainedModel):
+ config_class = ArcticConfig
+ base_model_prefix = "model"
+ supports_gradient_checkpointing = True
+ _no_split_modules = ["ArcticDecoderLayer"]
+ _skip_keys_device_placement = "past_key_values"
+ _supports_flash_attn_2 = True
+ _supports_sdpa = True
+ _supports_cache_class = True
+
+ def _init_weights(self, module):
+ std = self.config.initializer_range
+
+ if isinstance(module, nn.Linear):
+ module.weight.data.normal_(mean=0.0, std=std)
+ if module.bias is not None:
+ module.bias.data.zero_()
+ elif isinstance(module, nn.Embedding):
+ module.weight.data.normal_(mean=0.0, std=std)
+ if module.padding_idx is not None:
+ module.weight.data[module.padding_idx].zero_()
+
+
+MIXTRAL_INPUTS_DOCSTRING = r"""
+ Args:
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+ it.
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ [What are input IDs?](../glossary#input-ids)
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+ - 1 for tokens that are **not masked**,
+ - 0 for tokens that are **masked**.
+
+ [What are attention masks?](../glossary#attention-mask)
+
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+ [`PreTrainedTokenizer.__call__`] for details.
+
+ If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
+ `past_key_values`).
+
+ If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
+ and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
+ information on the default strategy.
+
+ - 1 indicates the head is **not masked**,
+ - 0 indicates the head is **masked**.
+ position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+ config.n_positions - 1]`.
+
+ [What are position IDs?](../glossary#position-ids)
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
+ `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
+ `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
+
+ Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
+ blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
+
+ If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
+ don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
+ `decoder_input_ids` of shape `(batch_size, sequence_length)`.
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+ is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
+ model's internal embedding lookup matrix.
+ use_cache (`bool`, *optional*):
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+ `past_key_values`).
+ output_attentions (`bool`, *optional*):
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+ tensors for more detail.
+ output_hidden_states (`bool`, *optional*):
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+ more detail.
+ return_dict (`bool`, *optional*):
+ Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+ "The bare Arctic Model outputting raw hidden-states without any specific head on top.",
+ ARCTIC_START_DOCSTRING,
+)
+# Copied from transformers.models.mistral.modeling_mistral.MistralModel with MISTRAL->MIXTRAL,Mistral->Arctic
+class ArcticModel(ArcticPreTrainedModel):
+ """
+ Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`ArcticDecoderLayer`]
+
+ Args:
+ config: ArcticConfig
+ """
+
+ def __init__(self, config: ArcticConfig, **kwargs):
+ super().__init__(config)
+ self.padding_idx = config.pad_token_id
+ self.vocab_size = config.vocab_size
+
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+ self.layers = nn.ModuleList(
+ [ArcticDecoderLayer(config, layer_idx, **kwargs) for layer_idx in range(config.num_hidden_layers)]
+ )
+ self._attn_implementation = config._attn_implementation
+ self.norm = ArcticRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+ self.gradient_checkpointing = True
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+ for layer in self.layers:
+ layer.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+ def get_input_embeddings(self):
+ return self.embed_tokens
+
+ def set_input_embeddings(self, value):
+ self.embed_tokens = value
+
+ # Ignore copy
+ @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ cache_position: Optional[torch.LongTensor] = None,
+ token_idx: Optional[torch.Tensor] = None,
+ reuse_cache: Optional[bool] = False,
+ flash_attention_recompute: Optional[bool] = False,
+ cache_idx: int = None,
+ ) -> Union[Tuple, MoeModelOutputWithPast]:
+ """
+ Modified from original Arctic forward
+ Changes:
+ - Add new arg cache_position
+ - Add new arg token_idx
+ - Add new arg reuse_cache
+ - Add new arg flash_attention_recompute
+ - Add new arg cache_idx
+ - Force legacy KV cache
+ """
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+ )
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ # retrieve input_ids and inputs_embeds
+ if input_ids is not None and inputs_embeds is not None:
+ raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+ elif input_ids is not None:
+ batch_size, seq_length = input_ids.shape
+ elif inputs_embeds is not None:
+ batch_size, seq_length, _ = inputs_embeds.shape
+ else:
+ raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+ past_key_values_length = 0
+
+ if self.gradient_checkpointing and self.training:
+ if use_cache:
+ logger.warning_once(
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+ )
+ use_cache = False
+
+ # NOTE: Forcing legacy cache for HPU
+ if past_key_values is not None and use_cache:
+ if reuse_cache:
+ past_key_values_length = past_key_values[0][0][2]
+ else:
+ past_key_values_length = past_key_values[0][0].shape[2]
+
+ if position_ids is None:
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
+ position_ids = torch.arange(
+ past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+ )
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+ else:
+ position_ids = position_ids.view(-1, seq_length).long()
+
+ if inputs_embeds is None:
+ inputs_embeds = self.embed_tokens(input_ids)
+
+ if cache_position is None:
+ past_seen_tokens = 0
+ if past_key_values is not None:
+ if isinstance(past_key_values, Cache):
+ past_seen_tokens = past_key_values.get_seq_length()
+ else:
+ past_seen_tokens = past_key_values[0][0].shape[2]
+
+ cache_position = torch.arange(
+ past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
+ )
+
+ if position_ids is None:
+ position_ids = cache_position.unsqueeze(0)
+
+ if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
+ is_padding_right = attention_mask[:, -1].sum().item() != batch_size
+ if is_padding_right:
+ raise ValueError(
+ "You are attempting to perform batched generation with padding_side='right'"
+ " this may lead to unexpected behaviour for Flash Attention version of Arctic. Make sure to "
+ " call `tokenizer.padding_side = 'left'` before tokenizing the input. "
+ )
+
+ if self._attn_implementation == "flash_attention_2":
+ # 2d mask is passed through the layers
+ attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+ elif self._attn_implementation == "sdpa" and not output_attentions:
+ # output_attentions=True can not be supported when using SDPA, and we fall back on
+ # the manual implementation that requires a 4D causal mask in all cases.
+ attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
+ attention_mask,
+ (batch_size, seq_length),
+ inputs_embeds,
+ past_key_values_length,
+ )
+ else:
+ # 4d mask is passed through the layers
+ attention_mask = _prepare_4d_causal_attention_mask(
+ attention_mask,
+ (batch_size, seq_length),
+ inputs_embeds,
+ past_key_values_length,
+ sliding_window=self.config.sliding_window,
+ )
+
+ hidden_states = inputs_embeds
+
+ # decoder layers
+ all_hidden_states = () if output_hidden_states else None
+ all_self_attns = () if output_attentions else None
+ all_router_losses = ()
+ next_decoder_cache = () if use_cache else None
+
+ for i, decoder_layer in enumerate(self.layers):
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ if self.gradient_checkpointing and self.training:
+ layer_outputs = self._gradient_checkpointing_func(
+ decoder_layer.__call__,
+ hidden_states,
+ attention_mask,
+ position_ids,
+ past_key_values,
+ output_attentions,
+ use_cache,
+ cache_position,
+ )
+ else:
+ layer_outputs = decoder_layer(
+ hidden_states,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_value=None if past_key_values is None else past_key_values[i],
+ output_attentions=output_attentions,
+ use_cache=use_cache,
+ cache_position=cache_position,
+ token_idx=token_idx,
+ reuse_cache=reuse_cache,
+ flash_attention_recompute=flash_attention_recompute,
+ cache_idx=cache_idx,
+ )
+
+ hidden_states = layer_outputs[0]
+
+ if use_cache:
+ next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+ if output_attentions:
+ all_self_attns += (layer_outputs[1],)
+
+ all_router_losses += (layer_outputs[-1],)
+ htcore.mark_step()
+ hidden_states = self.norm(hidden_states)
+
+ # add hidden states from the last decoder layer
+ if output_hidden_states:
+ all_hidden_states += (hidden_states,)
+
+ next_cache = None
+ if use_cache:
+ next_cache = (
+ next_decoder_cache.to_legacy_cache() if isinstance(next_decoder_cache, Cache) else next_decoder_cache
+ )
+
+ if not return_dict:
+ return tuple(
+ v
+ for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_router_losses]
+ if v is not None
+ )
+ return MoeModelOutputWithPast(
+ last_hidden_state=hidden_states,
+ past_key_values=next_cache,
+ hidden_states=all_hidden_states,
+ attentions=all_self_attns,
+ router_logits=all_router_losses,
+ )
+
+
+class ArcticForCausalLM(ArcticPreTrainedModel, GenerationMixin):
+ # TODO(jeffra): update _keys_to_ignore_on_load_unexpected with expert keys not relevant for this rank
+ _keys_to_ignore_on_load_unexpected = [
+ r"model\.layers\.\d+\.block_sparse_moe\.experts\.\d+\.w\d+\.weight"
+ r"model\.layers\.\d+\.block_sparse_moe\.gate\.weight"
+ ]
+ _keys_to_ignore_on_load_missing = [
+ r"model\.layers\.\d+\.block_sparse_moe\.mlp\.deepspeed_moe\.experts\.deepspeed_experts\.\d+\.w\d+\.weight",
+ r"model\.layers\.\d+\.block_sparse_moe\.mlp\.deepspeed_moe\.gate\.wg\.weight",
+ ]
+ _tied_weights_keys = [] # ["lm_head.weight"]
+
+ def __init__(self, config: ArcticConfig, **kwargs):
+ super().__init__(config)
+ self.model = ArcticModel(config, **kwargs)
+ self.vocab_size = config.vocab_size
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+ self.router_aux_loss_coef = config.router_aux_loss_coef
+ self.num_experts = config.num_local_experts
+ self.num_experts_per_tok = config.num_experts_per_tok
+ self.use_deepspeed_moe = kwargs.get(USE_DEEPSPEED_MOE_ARG, False)
+ self.moe_expert_parallel_size = kwargs.get(MOE_EXPERT_PARALLEL_SIZE_ARG, 1)
+ self.is_deepspeed_lora = kwargs.get(DEEPSPEED_LORA_CONFIG) is not None
+ self.gradient_checkpointing = True
+ # self.shard_base_weights_if_doing_lora = kwargs.get("shard_base_weights_if_doing_lora", False)
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+ self.model.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+ self.kv_cache_len = max_seq_len
+
+ def get_input_embeddings(self):
+ return self.model.embed_tokens
+
+ def set_input_embeddings(self, value):
+ self.model.embed_tokens = value
+
+ def get_output_embeddings(self):
+ return self.lm_head
+
+ def set_output_embeddings(self, new_embeddings):
+ self.lm_head = new_embeddings
+
+ def set_decoder(self, decoder):
+ self.model = decoder
+
+ def get_decoder(self):
+ return self.model
+
+ @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
+ @replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+ # Ignore copy
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ labels: Optional[torch.LongTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ cache_position: Optional[torch.LongTensor] = None,
+ token_idx: Optional[torch.Tensor] = None,
+ reuse_cache: Optional[bool] = None,
+ flash_attention_recompute: Optional[bool] = False,
+ cache_idx: int = None,
+ ) -> Union[Tuple, MoeCausalLMOutputWithPast]:
+ r"""
+ Modified from original. Only differences are:
+ - Add new arg cache_position
+ - Add new arg token_idx
+ - Add new arg reuse_cache
+ - Add new arg flash_attention_recompute
+ - Add new arg cache_idx
+
+ Args:
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+
+ Returns:
+
+ Example:
+
+ ```python
+ >>> from transformers import AutoTokenizer, ArcticForCausalLM
+
+ >>> model = ArcticForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
+ >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
+
+ >>> prompt = "Hey, are you conscious? Can you talk to me?"
+ >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+ >>> # Generate
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+ "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
+ ```"""
+
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+
+ output_hidden_states = (
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+ )
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+ outputs = self.model(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_values=past_key_values,
+ inputs_embeds=inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ cache_position=cache_position,
+ token_idx=token_idx,
+ reuse_cache=reuse_cache,
+ flash_attention_recompute=flash_attention_recompute,
+ cache_idx=cache_idx,
+ )
+ hidden_states = outputs[0]
+ logits = self.lm_head(hidden_states)
+ logits = logits.float()
+
+ loss = None
+ if labels is not None:
+ # Shift so that tokens < n predict n
+ shift_logits = logits[..., :-1, :].contiguous()
+ shift_labels = labels[..., 1:].contiguous()
+ # Flatten the tokens
+ loss_fct = CrossEntropyLoss()
+ shift_logits = shift_logits.view(-1, self.config.vocab_size)
+ shift_labels = shift_labels.view(-1)
+ # Enable model parallelism
+ shift_labels = shift_labels.to(shift_logits.device)
+ loss = loss_fct(shift_logits, shift_labels)
+
+ # Move to same device for model parallelism.
+ aux_loss = sum([out.to(logits.device) for out in outputs[-1]])
+ if labels is not None:
+ loss += self.router_aux_loss_coef * aux_loss
+
+ if not return_dict:
+ output = (logits,) + outputs[1:]
+ # torch.distributed.barrier()
+ return (loss,) + output if loss is not None else output
+
+ return MoeCausalLMOutputWithPast(
+ loss=loss,
+ aux_loss=aux_loss,
+ logits=logits,
+ past_key_values=outputs.past_key_values,
+ hidden_states=outputs.hidden_states,
+ attentions=outputs.attentions,
+ )
+
+ def prepare_inputs_for_generation(
+ self,
+ input_ids,
+ past_key_values=None,
+ attention_mask=None,
+ inputs_embeds=None,
+ cache_position=None,
+ position_ids=None,
+ use_cache=True,
+ num_logits_to_keep=None,
+ **kwargs,
+ ):
+ """
+ Copied from GaudiMixtralForCausalLM in optimum/habana/transformers/models/mixtral/modeling_mixtral.py
+ """
+ reuse_cache = kwargs.get("reuse_cache")
+ token_idx = kwargs.get("token_idx", None)
+
+ # Omit tokens covered by past_key_values
+ if past_key_values is not None:
+ if token_idx is not None:
+ idx = token_idx + kwargs.get("inputs_embeds_offset", 0) - 1
+ input_ids = torch.index_select(input_ids, 1, idx)
+ else:
+ if inputs_embeds is not None: # Exception 1
+ input_ids = input_ids[:, -cache_position.shape[0] :]
+ elif (
+ input_ids.shape[1] != cache_position.shape[0]
+ ): # Default case (the "else", a no op, is Exception 2)
+ input_ids = input_ids[:, cache_position]
+ elif reuse_cache and token_idx is not None:
+ # With reuse_cache, KV cache is pre allocated hence for the 1st token we can slice the inputs till token idx for the fwd pass
+ input_ids = input_ids[:, :token_idx]
+ attention_mask = attention_mask[:, :token_idx]
+
+ if attention_mask is not None and position_ids is None:
+ # create position_ids on the fly for batch generation
+ position_ids = attention_mask.long().cumsum(-1) - 1
+ position_ids.masked_fill_(attention_mask == 0, 1)
+ if past_key_values:
+ if token_idx is not None:
+ position_ids = torch.index_select(position_ids, 1, token_idx - 1)
+ else:
+ position_ids = position_ids[:, -input_ids.shape[1] :]
+
+ # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+ if inputs_embeds is not None and past_key_values is None:
+ model_inputs = {"inputs_embeds": inputs_embeds}
+ else:
+ model_inputs = {"input_ids": input_ids.contiguous()} # `contiguous()` needed for compilation use cases
+
+ if num_logits_to_keep is not None:
+ model_inputs["num_logits_to_keep"] = num_logits_to_keep
+
+ model_inputs.update(
+ {
+ "position_ids": position_ids,
+ "cache_position": cache_position,
+ "past_key_values": past_key_values,
+ "use_cache": use_cache,
+ "attention_mask": attention_mask,
+ "token_idx": token_idx,
+ "reuse_cache": reuse_cache,
+ "flash_attention_recompute": kwargs.get("flash_attention_recompute"),
+ "cache_idx": kwargs.get("cache_idx"),
+ }
+ )
+ return model_inputs
+
+ @staticmethod
+ def _reorder_cache(past_key_values, beam_idx):
+ reordered_past = ()
+ for layer_past in past_key_values:
+ reordered_past += (
+ tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
+ )
+ return reordered_past
+
+
+@add_start_docstrings(
+ """
+ The Arctic Model transformer with a sequence classification head on top (linear layer).
+
+ [`ArcticForSequenceClassification`] uses the last token in order to do the classification, as other causal models
+ (e.g. GPT-2) do.
+
+ Since it does classification on the last token, it requires to know the position of the last token. If a
+ `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
+ no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
+ padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
+ each row of the batch).
+ """,
+ ARCTIC_START_DOCSTRING,
+)
+# Copied from transformers.models.llama.modeling_llama.LlamaForSequenceClassification with Llama->Arctic, LLAMA->MIXTRAL
+class ArcticForSequenceClassification(ArcticPreTrainedModel):
+ def __init__(self, config):
+ super().__init__(config)
+ self.num_labels = config.num_labels
+ self.model = ArcticModel(config)
+ self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+
+ # Initialize weights and apply final processing
+ self.post_init()
+
+ def get_input_embeddings(self):
+ return self.model.embed_tokens
+
+ def set_input_embeddings(self, value):
+ self.model.embed_tokens = value
+
+ @add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
+ def forward(
+ self,
+ input_ids: torch.LongTensor = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.LongTensor] = None,
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
+ inputs_embeds: Optional[torch.FloatTensor] = None,
+ labels: Optional[torch.LongTensor] = None,
+ use_cache: Optional[bool] = None,
+ output_attentions: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+ r"""
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+ """
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+ transformer_outputs = self.model(
+ input_ids,
+ attention_mask=attention_mask,
+ position_ids=position_ids,
+ past_key_values=past_key_values,
+ inputs_embeds=inputs_embeds,
+ use_cache=use_cache,
+ output_attentions=output_attentions,
+ output_hidden_states=output_hidden_states,
+ return_dict=return_dict,
+ )
+ hidden_states = transformer_outputs[0]
+ logits = self.score(hidden_states)
+
+ if input_ids is not None:
+ batch_size = input_ids.shape[0]
+ else:
+ batch_size = inputs_embeds.shape[0]
+
+ if self.config.pad_token_id is None and batch_size != 1:
+ raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+ if self.config.pad_token_id is None:
+ sequence_lengths = -1
+ else:
+ if input_ids is not None:
+ # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
+ sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
+ sequence_lengths = sequence_lengths % input_ids.shape[-1]
+ sequence_lengths = sequence_lengths.to(logits.device)
+ else:
+ sequence_lengths = -1
+
+ pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+
+ loss = None
+ if labels is not None:
+ labels = labels.to(logits.device)
+ if self.config.problem_type is None:
+ if self.num_labels == 1:
+ self.config.problem_type = "regression"
+ elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+ self.config.problem_type = "single_label_classification"
+ else:
+ self.config.problem_type = "multi_label_classification"
+
+ if self.config.problem_type == "regression":
+ loss_fct = MSELoss()
+ if self.num_labels == 1:
+ loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+ else:
+ loss = loss_fct(pooled_logits, labels)
+ elif self.config.problem_type == "single_label_classification":
+ loss_fct = CrossEntropyLoss()
+ loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+ elif self.config.problem_type == "multi_label_classification":
+ loss_fct = BCEWithLogitsLoss()
+ loss = loss_fct(pooled_logits, labels)
+ if not return_dict:
+ output = (pooled_logits,) + transformer_outputs[1:]
+ return ((loss,) + output) if loss is not None else output
+
+ return SequenceClassifierOutputWithPast(
+ loss=loss,
+ logits=pooled_logits,
+ past_key_values=transformer_outputs.past_key_values,
+ hidden_states=transformer_outputs.hidden_states,
+ attentions=transformer_outputs.attentions,
+ )
+
+
+# Copied from optimum.habana.transformers.models.llama.modeling_llama:apply_customized_rope()
+def apply_customized_rope(q, k, cos, sin, position_ids, training=True):
+ if q.device.type == "hpu" and FusedRoPE is not None:
+ return apply_customized_rope_module(q, k, cos, sin, position_ids, training)
+ else:
+ # keep the same implementation as Transformers v4.37.2
+ return apply_rotary_pos_emb(q, k, cos[position_ids], sin[position_ids], position_ids)
diff --git a/optimum/habana/transformers/models/snowflake/tokenization_arctic.py b/optimum/habana/transformers/models/snowflake/tokenization_arctic.py
new file mode 100644
index 0000000000..8fbe2463b6
--- /dev/null
+++ b/optimum/habana/transformers/models/snowflake/tokenization_arctic.py
@@ -0,0 +1,56 @@
+"""Tokenization classes for Arctic. Copied from https://huggingface.co/Snowflake/snowflake-arctic-instruct/tree/be318cae5aba5291208f27d30991a5150500887d."""
+
+from typing import Any, Dict, Optional
+
+from transformers.models.llama.tokenization_llama import LlamaTokenizer
+
+
+class ArcticTokenizer(LlamaTokenizer):
+ def __init__(
+ self,
+ vocab_file,
+ unk_token="",
+ bos_token="",
+ eos_token="",
+ pad_token=None,
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
+ add_bos_token=True,
+ add_eos_token=False,
+ clean_up_tokenization_spaces=False,
+ use_default_system_prompt=False,
+ spaces_between_special_tokens=False,
+ legacy=False,
+ add_prefix_space=True,
+ **kwargs,
+ ):
+ # Same as LlamaTokenizer except default legacy=False.
+ super().__init__(
+ vocab_file,
+ bos_token=bos_token,
+ eos_token=eos_token,
+ unk_token=unk_token,
+ pad_token=pad_token,
+ sp_model_kwargs=sp_model_kwargs,
+ add_bos_token=add_bos_token,
+ add_eos_token=add_eos_token,
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+ use_default_system_prompt=use_default_system_prompt,
+ spaces_between_special_tokens=spaces_between_special_tokens,
+ legacy=legacy,
+ add_prefix_space=add_prefix_space,
+ **kwargs,
+ )
+
+ @property
+ def default_chat_template(self):
+ """
+ This template formats inputs in the standard Arctic format.
+ """
+ return (
+ "{% for message in messages %}"
+ "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
+ "{% endfor %}"
+ "{% if add_generation_prompt %}"
+ "{{ '<|im_start|>assistant\n' }}"
+ "{% endif %}"
+ )
diff --git a/optimum/habana/transformers/models/video_llava/__init__.py b/optimum/habana/transformers/models/video_llava/__init__.py
index 5c5f894d56..57831502dc 100644
--- a/optimum/habana/transformers/models/video_llava/__init__.py
+++ b/optimum/habana/transformers/models/video_llava/__init__.py
@@ -1,2 +1 @@
from .modeling_video_llava import GaudiVideoLlavaForConditionalGeneration
-from .processing_video_llava import GaudiVideoLlavaProcessor
diff --git a/optimum/habana/transformers/models/video_llava/modeling_video_llava.py b/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
index 6670b23375..ec9a10f053 100644
--- a/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
+++ b/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
@@ -18,148 +18,17 @@
import torch
from torch import nn
-from transformers.modeling_outputs import BaseModelOutputWithPooling
from transformers.models.video_llava.modeling_video_llava import (
VideoLlavaCausalLMOutputWithPast,
- VideoLlavaConfig,
VideoLlavaForConditionalGeneration,
)
-from transformers.utils import logging
+from transformers.utils import is_torchdynamo_compiling, logging
logger = logging.get_logger(__name__)
class GaudiVideoLlavaForConditionalGeneration(VideoLlavaForConditionalGeneration):
- def __init__(self, config: VideoLlavaConfig):
- super().__init__(config)
- self.feature_offset = 0
-
- def _merge_input_ids_with_visual_features(
- self, visual_features, inputs_embeds, input_ids, attention_mask, labels, token_idx, num_frames=1
- ):
- r"""
- Copied from VideoLlavaForConditionalGeneration._merge_input_ids_with_visual_features: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/modeling_video_llava.py
- The only differences are:
- - add new args token_idx
- - add self.feature_offset param
- """
- num_images, num_image_patches, embed_dim = visual_features.shape
- batch_size, sequence_length = input_ids.shape
- last_token_idx = token_idx + self.feature_offset
- left_padding = not torch.sum(input_ids[:, last_token_idx - 1] == torch.tensor(self.pad_token_id))
- special_vision_token = self.config.video_token_index if num_frames > 1 else self.config.image_token_index
-
- # 1. Create a mask to know where special image tokens are
- special_image_token_mask = input_ids == special_vision_token
- num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1)
- # Compute the maximum embed dimension
- max_seq_len = (num_special_image_tokens.max() * (num_image_patches * num_frames - 1)) + sequence_length
- self.feature_offset = self.feature_offset + max_seq_len - sequence_length
- batch_indices, non_image_indices = torch.where(input_ids != special_vision_token)
-
- # 2. Compute the positions where text should be written
- # Calculate new positions for text tokens in merged image-text sequence.
- # `special_image_token_mask` identifies image tokens. Each image token will be replaced by `nb_text_tokens_per_images - 1` text tokens.
- # `torch.cumsum` computes how each image token shifts subsequent text token positions.
- # - 1 to adjust for zero-based indexing, as `cumsum` inherently increases indices by one.
- new_token_positions = (
- torch.cumsum((special_image_token_mask * (num_image_patches * num_frames - 1) + 1), dim=-1) - 1
- )
- nb_image_pad = max_seq_len - 1 - new_token_positions[:, -1]
- if left_padding:
- new_token_positions += nb_image_pad[:, None] # offset for left padding
- text_to_overwrite = new_token_positions[batch_indices, non_image_indices]
-
- # 3. Create the full embedding, already padded to the maximum position
- # expand input ids so that the second "merge" with videos does not fail
- final_embedding = torch.zeros(
- batch_size, max_seq_len, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device
- )
- final_attention_mask = torch.zeros(
- batch_size, max_seq_len, dtype=attention_mask.dtype, device=inputs_embeds.device
- )
- final_input_ids = torch.full(
- (batch_size, max_seq_len), self.pad_token_id, dtype=input_ids.dtype, device=inputs_embeds.device
- )
- # In case the Vision model or the Language model has been offloaded to CPU, we need to manually
- # set the corresponding tensors into their correct target device.
- target_device = inputs_embeds.device
- batch_indices, non_image_indices, text_to_overwrite = (
- batch_indices.to(target_device),
- non_image_indices.to(target_device),
- text_to_overwrite.to(target_device),
- )
- attention_mask = attention_mask.to(target_device)
-
- # 4. Fill the embeddings based on the mask. If we have ["hey" "", "how", "are"]
- # we need to index copy on [0, 577, 578, 579] for the text and [1:576] for the image features
- final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_image_indices]
- final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_image_indices]
- final_input_ids[batch_indices, text_to_overwrite] = input_ids[batch_indices, non_image_indices]
- if labels is not None:
- final_labels = torch.full(
- (batch_size, max_seq_len), self.config.ignore_index, dtype=input_ids.dtype, device=input_ids.device
- )
- final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_image_indices]
- else:
- final_labels = None
-
- # 5. Fill the embeddings corresponding to the images. Anything that is still zeros needs filling
- image_to_overwrite = torch.full((batch_size, max_seq_len), True, dtype=torch.bool, device=inputs_embeds.device)
- image_to_overwrite[batch_indices, text_to_overwrite] = False
- image_to_overwrite &= image_to_overwrite.cumsum(-1) - 1 >= nb_image_pad[:, None].to(target_device)
-
- if image_to_overwrite.sum() != visual_features.shape[:-1].numel():
- visual_type = "videos" if num_frames == 8 else "images"
- num_images //= num_frames
- raise ValueError(
- f"The input provided to the model are wrong. The number of {visual_type} tokens is {torch.sum(special_image_token_mask)} while"
- f" the number of {visual_type} given to the model is {num_images}. This prevents correct indexing and breaks batch generation."
- )
-
- final_embedding[image_to_overwrite] = visual_features.contiguous().reshape(-1, embed_dim).to(target_device)
- final_attention_mask |= image_to_overwrite
- position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1)
-
- return final_embedding, final_attention_mask, final_labels, position_ids, final_input_ids
-
- def _get_vision_features(
- self,
- pixel_values_images: Optional[torch.FloatTensor] = None,
- pixel_values_videos: Optional[torch.FloatTensor] = None,
- vision_feature_layer: Optional[int] = None,
- vision_feature_select_strategy: Optional[str] = None,
- ) -> Union[Tuple, BaseModelOutputWithPooling]:
- if pixel_values_images is None and pixel_values_videos is None:
- raise ValueError("You have to specify `pixel_values_images` or `pixel_values_videos`")
-
- # videos do not need to select features and it's always "full" (as it is done in the orig implementation)
- if pixel_values_videos is not None:
- batch_size_vid, num_frames, channels, height, width = pixel_values_videos.shape
-
- pixel_values = pixel_values_videos.reshape(batch_size_vid * num_frames, channels, height, width)
- video_outputs = self.video_tower(pixel_values, output_hidden_states=True)
- video_outputs = video_outputs.hidden_states[vision_feature_layer].squeeze(1)
- else:
- video_outputs = None
- num_frames = 0
-
- if pixel_values_images is not None:
- image_outputs = self.image_tower(pixel_values_images, output_hidden_states=True)
- image_outputs = image_outputs.hidden_states[vision_feature_layer].squeeze(1)
-
- if vision_feature_select_strategy == "default":
- image_outputs = image_outputs[:, 1:]
- elif vision_feature_select_strategy == "full":
- image_outputs = image_outputs
- else:
- raise ValueError(f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}")
- else:
- image_outputs = None
-
- return image_outputs, video_outputs, num_frames
-
def forward(
self,
input_ids: Optional[torch.LongTensor] = None,
@@ -179,18 +48,12 @@ def forward(
cache_position: Optional[torch.LongTensor] = None,
logits_to_keep: Union[int, torch.Tensor] = 0,
token_idx: Optional[torch.Tensor] = None,
- **kwargs,
+ **lm_kwargs,
) -> Union[Tuple, VideoLlavaCausalLMOutputWithPast]:
r"""
- Copied from VideoLlavaForConditionalGeneration.forward: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/modeling_video_llava.py
- The only differences are:
+ Copied from VideoLlavaForConditionalGeneration.forward: https://github.com/huggingface/transformers/blob/v4.51.3/src/transformers/models/video_llava/modeling_video_llava.py#L365
+ The only difference is:
- add new args token_idx
- - add new args attn_softmax_bf16
- - add new args reuse_cache
- - add new args use_flash_attention
- - add new args flash_attention_recompute
- - add new args flash_attention_causal_mask
- - add new args flash_attention_fast_softmax
"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@@ -198,6 +61,59 @@ def forward(
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+ vision_feature_layer = (
+ vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
+ )
+ vision_feature_select_strategy = (
+ vision_feature_select_strategy
+ if vision_feature_select_strategy is not None
+ else self.config.vision_feature_select_strategy
+ )
+
+ if (input_ids is None) ^ (inputs_embeds is not None):
+ raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
+
+ if (pixel_values_images is not None or pixel_values_videos is not None) and inputs_embeds is not None:
+ raise ValueError(
+ "You cannot specify both `pixel_values_images`/`pixel_values_videos` and `inputs_embeds` at the same "
+ "time, and must specify either one"
+ )
+
+ if inputs_embeds is None:
+ inputs_embeds = self.get_input_embeddings()(input_ids)
+
+ if pixel_values_images is not None:
+ image_features = self.get_image_features(
+ pixel_values_images,
+ vision_feature_layer=vision_feature_layer,
+ vision_feature_select_strategy=vision_feature_select_strategy,
+ )
+ special_image_mask = (input_ids == self.config.image_token_index).unsqueeze(-1)
+ special_image_mask = special_image_mask.expand_as(inputs_embeds).to(inputs_embeds.device)
+ if not is_torchdynamo_compiling() and inputs_embeds[special_image_mask].numel() != image_features.numel():
+ n_image_tokens = (input_ids == self.config.image_token_index).sum()
+ n_image_features = image_features.shape[0] * image_features.shape[1]
+ raise ValueError(
+ f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
+ )
+ image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
+ inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
+
+ if pixel_values_videos is not None:
+ video_features, num_frames = self.get_video_features(
+ pixel_values_videos=pixel_values_videos, vision_feature_layer=vision_feature_layer
+ )
+
+ special_image_mask = (input_ids == self.config.video_token_index).unsqueeze(-1)
+ special_image_mask = special_image_mask.expand_as(inputs_embeds).to(inputs_embeds.device)
+ if not is_torchdynamo_compiling() and inputs_embeds[special_image_mask].numel() != video_features.numel():
+ n_video_tokens = (input_ids == self.config.video_token_index).sum()
+ n_video_features = video_features.shape[0] * video_features.shape[1]
+ raise ValueError(
+ f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {n_video_features}"
+ )
+ video_features = video_features.to(inputs_embeds.device, inputs_embeds.dtype)
+ inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, video_features)
outputs = self.language_model(
attention_mask=attention_mask,
@@ -209,14 +125,12 @@ def forward(
output_hidden_states=output_hidden_states,
return_dict=return_dict,
cache_position=cache_position,
- logits_to_keep=0,
+ logits_to_keep=logits_to_keep,
token_idx=token_idx,
- **kwargs,
+ **lm_kwargs,
)
logits = outputs[0]
- if logits.shape[1] > 1:
- logits = logits[:, self.feature_offset :, :]
loss = None
if labels is not None:
@@ -246,196 +160,6 @@ def forward(
past_key_values=outputs.past_key_values,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
- image_hidden_states=kwargs.get("image_features", None) if pixel_values_images is not None else None,
- video_hidden_states=kwargs.get("video_features", None) if pixel_values_videos is not None else None,
- )
-
- def prepare_inputs_for_generation(
- self,
- input_ids,
- past_key_values=None,
- inputs_embeds=None,
- pixel_values_images=None,
- pixel_values_videos=None,
- attention_mask=None,
- cache_position=None,
- logits_to_keep=None,
- **kwargs,
- ):
- token_idx = kwargs.get("token_idx", None)
- if token_idx is None:
- return super().prepare_inputs_for_generation(
- input_ids=input_ids,
- past_key_values=past_key_values,
- inputs_embeds=inputs_embeds,
- pixel_values_images=pixel_values_images,
- pixel_values_videos=pixel_values_videos,
- attention_mask=attention_mask,
- cache_position=cache_position,
- logits_to_keep=logits_to_keep,
- **kwargs,
- )
- # Else, we need to update token_idx when merging features from videos/images with input embeddings
- labels = kwargs.get("labels", None)
- if (input_ids is None) ^ (inputs_embeds is not None):
- raise ValueError(
- "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
- )
-
- if (pixel_values_images is not None or pixel_values_videos is not None) and inputs_embeds is not None:
- raise ValueError(
- "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one"
- )
-
- legacy_processing = False
- inputs_not_expanded = False
- if input_ids is not None:
- img_token_not_enough = (input_ids == self.config.image_token_index).sum(
- 1
- ).max() < self.config.image_seq_length
- video_token_not_enough = (input_ids == self.config.video_token_index).sum(
- 1
- ).max() < self.config.video_seq_length
- # if the number of image/video tokens is more than image embeddings seq length, then prob we expanded it in processing
- # not very reliable, but we don't expect one to actually pass 500+ images for one prompt
- inputs_not_expanded = (img_token_not_enough and pixel_values_images is not None) or (
- video_token_not_enough and pixel_values_videos is not None
- )
- model_inputs = self.language_model.prepare_inputs_for_generation(
- input_ids,
- past_key_values=past_key_values,
- inputs_embeds=inputs_embeds,
- attention_mask=attention_mask,
- cache_position=cache_position,
- logits_to_keep=logits_to_keep,
- **kwargs,
- )
- position_ids = model_inputs["position_ids"]
- cache_position = model_inputs["cache_position"]
- attention_mask = model_inputs["attention_mask"]
- inputs_embeds = model_inputs.get("inputs_embeds", None)
- input_ids = model_inputs.get("input_ids", None)
-
- if inputs_embeds is None:
- inputs_embeds = self.get_input_embeddings()(input_ids)
- pixels_present = input_ids.shape[-1] == 1 and (
- pixel_values_images is not None or pixel_values_videos is not None
- )
- legacy_processing = inputs_not_expanded or pixels_present
-
- vision_feature_layer = kwargs.get("vision_feature_layer", None)
- vision_feature_layer = (
- vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
- )
- vision_feature_select_strategy = kwargs.get("vision_feature_select_strategy", None)
- vision_feature_select_strategy = (
- vision_feature_select_strategy
- if vision_feature_select_strategy is not None
- else self.config.vision_feature_select_strategy
- )
- if pixel_values_images is not None or pixel_values_videos is not None:
- image_outputs, video_outputs, num_frames = self._get_vision_features(
- pixel_values_images=pixel_values_images,
- pixel_values_videos=pixel_values_videos,
- vision_feature_layer=vision_feature_layer,
- vision_feature_select_strategy=vision_feature_select_strategy,
- )
-
- image_features = video_features = None
- if image_outputs is not None:
- image_features = self.multi_modal_projector(image_outputs)
- if video_outputs is not None:
- video_features = self.multi_modal_projector(video_outputs)
-
- if legacy_processing:
- logger.warning_once(
- "Expanding inputs for image tokens in Video-LLaVa should be done in processing. "
- "Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
- "with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
- "Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
- )
- if input_ids.shape[1] != 1:
- self.feature_offset = 0
- for features, frames in ((image_features, 1), (video_features, num_frames)):
- if features is not None:
- (
- inputs_embeds,
- attention_mask,
- labels,
- position_ids,
- input_ids,
- ) = self._merge_input_ids_with_visual_features(
- features,
- inputs_embeds,
- input_ids,
- attention_mask,
- labels,
- token_idx,
- num_frames=frames,
- )
- cache_position = torch.arange(attention_mask.shape[1], device=attention_mask.device)
- else:
- # Retrieve the first layer to inspect the logits and mask out the hidden states
- # that are set to 0
- first_layer_past_key_value = past_key_values[0][0][:, :, :, 0]
-
- # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941
- batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0)
-
- target_length = input_ids.shape[1]
- past_length = first_layer_past_key_value.shape[-1]
-
- extended_attention_mask = torch.ones(
- (attention_mask.shape[0], past_length),
- dtype=attention_mask.dtype,
- device=attention_mask.device,
- )
-
- # Filter out only the tokens that can be un-attended, this can happen
- # if one uses Llava + Fused modules where the cache on the
- # first iteration is already big enough, or if one passes custom cache
- valid_indices = non_attended_tokens < extended_attention_mask.size(-1)
- new_batch_index = batch_index[valid_indices]
- new_non_attended_tokens = non_attended_tokens[valid_indices]
-
- # Zero-out the places where we don't need to attend
- extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0
- new_token_idx = token_idx + self.feature_offset
- extended_attention_mask[:, new_token_idx - 1 + target_length :] = 0
- attention_mask = extended_attention_mask.clone()
- position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1
- cache_position = new_token_idx
-
- # TODO: @raushan retain only the new behavior after v4.47
- else:
- if image_outputs is not None:
- special_image_mask = (
- (input_ids == self.config.image_token_index).unsqueeze(-1).expand_as(inputs_embeds)
- )
- image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
- inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
-
- if video_outputs is not None:
- special_image_mask = (
- (input_ids == self.config.video_token_index).unsqueeze(-1).expand_as(inputs_embeds)
- )
- video_features = video_features.to(inputs_embeds.device, inputs_embeds.dtype)
- inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, video_features)
-
- model_inputs.update(
- {
- "position_ids": position_ids,
- "cache_position": cache_position,
- "attention_mask": attention_mask,
- "token_idx": token_idx + self.feature_offset,
- "inputs_embeds": inputs_embeds,
- }
+ image_hidden_states=image_features if pixel_values_images is not None else None,
+ video_hidden_states=video_features if pixel_values_videos is not None else None,
)
- if legacy_processing or (cache_position is not None and cache_position[0]) == 0:
- # If we're in cached decoding stage, pixel values should be None because input ids do not contain special image token anymore
- # Otherwise we need pixel values to be passed to model
- model_inputs["pixel_values_images"] = pixel_values_images
- model_inputs["pixel_values_videos"] = pixel_values_videos
- model_inputs["image_features"] = image_features
- model_inputs["video_features"] = video_features
- return model_inputs
diff --git a/optimum/habana/transformers/models/video_llava/processing_video_llava.py b/optimum/habana/transformers/models/video_llava/processing_video_llava.py
deleted file mode 100644
index 9ab480220c..0000000000
--- a/optimum/habana/transformers/models/video_llava/processing_video_llava.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from typing import List, Optional, Union
-
-from transformers.image_processing_utils import BatchFeature
-from transformers.image_utils import ImageInput, get_image_size, to_numpy_array
-from transformers.models.video_llava.processing_video_llava import VideoLlavaProcessor
-from transformers.tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy
-from transformers.utils import TensorType
-
-from optimum.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-
-class GaudiVideoLlavaProcessor(VideoLlavaProcessor):
- attributes = ["image_processor", "tokenizer"]
- valid_kwargs = ["chat_template", "patch_size", "vision_feature_select_strategy", "image_token", "video_token"]
- image_processor_class = "VideoLlavaImageProcessor"
- tokenizer_class = "AutoTokenizer"
-
- def __init__(
- self,
- image_processor=None,
- tokenizer=None,
- patch_size=None,
- vision_feature_select_strategy=None,
- image_token="", # set the default and let users change if they have peculiar special tokens in rare cases
- video_token="