From da58a2c379aa744919f22096d24a71163661a43f Mon Sep 17 00:00:00 2001
From: Chen Yang <2082464740@qq.com>
Date: Thu, 2 Apr 2026 12:43:11 +0800
Subject: [PATCH 01/10] Fix: DreamID-Omni

Signed-off-by: Chen Yang <2082464740@qq.com>
---
 .../x_to_video_audio/download_dreamid_omni.py      | 14 ++++++++++++--
 .../x_to_video_audio/x_to_video_audio.md           |  4 +++-
 .../x_to_video_audio/x_to_video_audio.py           |  4 ++--
 vllm_omni/entrypoints/async_omni_diffusion.py      |  8 +++++++-
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
index 0dbf402e9e3..fb97da040d2 100644
--- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
+++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
@@ -7,7 +7,7 @@
 import time
 from pathlib import Path
 
-from huggingface_hub import snapshot_download
+from huggingface_hub import snapshot_download,hf_hub_download
 
 DEPENDENCY_REPO = "https://github.com/bytedance/DreamID-V.git"
 DEPENDENCY_BRANCH = "omni"
@@ -89,7 +89,17 @@ def main(output_dir: str):
         json.dump(data, f, indent=2)
 
     print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}")
-
+    # Download transformer config for Wan2.2-TI2V-5B from HuggingFace
+    transformer_dir = os.path.join(output_dir, "transformer")
+    os.makedirs(transformer_dir, exist_ok=True)
+
+    print("Downloading transformer/config.json from Wan-AI/Wan2.2-TI2V-5B-Diffusers...")
+    transformer_config_path = hf_hub_download(
+        repo_id="Wan-AI/Wan2.2-TI2V-5B-Diffusers",
+        filename="transformer/config.json",
+        local_dir=output_dir,
+        local_dir_use_symlinks=False,
+    )
     # now we download the dependency code
     download_dependency()
 
diff --git a/examples/offline_inference/x_to_video_audio/x_to_video_audio.md b/examples/offline_inference/x_to_video_audio/x_to_video_audio.md
index 59b993a728d..4b5188f41b2 100644
--- a/examples/offline_inference/x_to_video_audio/x_to_video_audio.md
+++ b/examples/offline_inference/x_to_video_audio/x_to_video_audio.md
@@ -24,7 +24,9 @@ dreamid_omni/
 │   ├── models_t5_umt5-xxl-enc-bf16.pth
 │   ├── Wan2.2_VAE.pth
 │
-├── model_index.json # create by download_dreamid_omni.py
+├── model_index.json
+└── transformer/
+    └── config.json   # create by download_dreamid_omni.py
 ```
 
 ### Run the Inference
diff --git a/examples/offline_inference/x_to_video_audio/x_to_video_audio.py b/examples/offline_inference/x_to_video_audio/x_to_video_audio.py
index 17d0f06c3c5..e0424add69b 100644
--- a/examples/offline_inference/x_to_video_audio/x_to_video_audio.py
+++ b/examples/offline_inference/x_to_video_audio/x_to_video_audio.py
@@ -132,8 +132,8 @@ def main() -> None:
     if not outputs:
         raise RuntimeError("No output returned from DreamID-Omni.")
     output = outputs[0].request_output
-    generated_video = output[0].images[0][0]
-    generated_audio = output[0].images[0][1]
+    generated_video = output.images[0][0]
+    generated_audio = output.images[0][1]
     try:
         from dreamid_omni.utils.io_utils import save_video
     except Exception as e:
diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index 674c3509d22..5f0f682f868 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -110,6 +110,12 @@ def __init__(
             if config_dict is not None:
                 if od_config.model_class_name is None:
                     od_config.model_class_name = config_dict.get("_class_name", None)
+                # Populate model_config with the contents of model_index.json
+                # This includes model-specific keys like "fusion" for DreamID-Omni
+                if not od_config.model_config:
+                    # Remove _class_name as it's already stored in model_class_name
+                    model_config = {k: v for k, v in config_dict.items() if k != "_class_name"}
+                    od_config.model_config = model_config
                 od_config.update_multimodal_support()
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)
@@ -470,4 +476,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None
             self.engine.profile,
             is_start,
             profile_prefix,
-        )
+        )
\ No newline at end of file

From 09a2e96f408a018e7829eee41aff37ddba1bb512 Mon Sep 17 00:00:00 2001
From: Chen Yang <2082464740@qq.com>
Date: Thu, 2 Apr 2026 12:55:19 +0800
Subject: [PATCH 02/10] Fix pre-commit

Signed-off-by: Chen Yang <2082464740@qq.com>
---
 .../x_to_video_audio/download_dreamid_omni.py                 | 4 ++--
 vllm_omni/entrypoints/async_omni_diffusion.py                 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
index fb97da040d2..453e269894c 100644
--- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
+++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
@@ -7,7 +7,7 @@
 import time
 from pathlib import Path
 
-from huggingface_hub import snapshot_download,hf_hub_download
+from huggingface_hub import hf_hub_download, snapshot_download
 
 DEPENDENCY_REPO = "https://github.com/bytedance/DreamID-V.git"
 DEPENDENCY_BRANCH = "omni"
@@ -94,7 +94,7 @@ def main(output_dir: str):
     os.makedirs(transformer_dir, exist_ok=True)
 
     print("Downloading transformer/config.json from Wan-AI/Wan2.2-TI2V-5B-Diffusers...")
-    transformer_config_path = hf_hub_download(
+    hf_hub_download(
         repo_id="Wan-AI/Wan2.2-TI2V-5B-Diffusers",
         filename="transformer/config.json",
         local_dir=output_dir,
diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index 5f0f682f868..c714b633906 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -476,4 +476,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None
             self.engine.profile,
             is_start,
             profile_prefix,
-        )
\ No newline at end of file
+        )

From ea9403bc026dc84db854da3f97ee03df5b34ed08 Mon Sep 17 00:00:00 2001
From: Chen Yang <2082464740@qq.com>
Date: Thu, 2 Apr 2026 19:08:04 +0800
Subject: [PATCH 03/10] Fix bug

Signed-off-by: Chen Yang <2082464740@qq.com>
---
 .../x_to_video_audio/download_dreamid_omni.py    | 16 ++++++----------
 vllm_omni/entrypoints/async_omni_diffusion.py    | 10 ++++------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
index 453e269894c..4cd076c0687 100644
--- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
+++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
@@ -7,7 +7,7 @@
 import time
 from pathlib import Path
 
-from huggingface_hub import hf_hub_download, snapshot_download
+from huggingface_hub import snapshot_download
 
 DEPENDENCY_REPO = "https://github.com/bytedance/DreamID-V.git"
 DEPENDENCY_BRANCH = "omni"
@@ -89,17 +89,13 @@ def main(output_dir: str):
         json.dump(data, f, indent=2)
 
     print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}")
-    # Download transformer config for Wan2.2-TI2V-5B from HuggingFace
+    
     transformer_dir = os.path.join(output_dir, "transformer")
     os.makedirs(transformer_dir, exist_ok=True)
+    with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f:
+        json.dump({}, f)
+    print(f"transformer/config.json created at {os.path.join(transformer_dir, 'config.json')}")
 
-    print("Downloading transformer/config.json from Wan-AI/Wan2.2-TI2V-5B-Diffusers...")
-    hf_hub_download(
-        repo_id="Wan-AI/Wan2.2-TI2V-5B-Diffusers",
-        filename="transformer/config.json",
-        local_dir=output_dir,
-        local_dir_use_symlinks=False,
-    )
     # now we download the dependency code
     download_dependency()
 
@@ -110,4 +106,4 @@ def main(output_dir: str):
         "--output-dir", type=str, default="./dreamid_omni", help="Base directory to save downloaded models"
     )
     args = parser.parse_args()
-    main(args.output_dir)
+    main(args.output_dir)
\ No newline at end of file
diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index c714b633906..e394f020e23 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -110,16 +110,14 @@ def __init__(
             if config_dict is not None:
                 if od_config.model_class_name is None:
                     od_config.model_class_name = config_dict.get("_class_name", None)
-                # Populate model_config with the contents of model_index.json
-                # This includes model-specific keys like "fusion" for DreamID-Omni
-                if not od_config.model_config:
-                    # Remove _class_name as it's already stored in model_class_name
+
+                if od_config.model_class_name == "DreamIDOmniPipeline":
                     model_config = {k: v for k, v in config_dict.items() if k != "_class_name"}
                     od_config.model_config = model_config
                 od_config.update_multimodal_support()
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)
-                od_config.tf_model_config = TransformerConfig.from_dict(tf_config_dict)
+                od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict))
             else:
                 raise FileNotFoundError("model_index.json not found")
         except (AttributeError, OSError, ValueError, FileNotFoundError):
@@ -127,7 +125,7 @@ def __init__(
             if cfg is None:
                 raise ValueError(f"Could not find config.json or model_index.json for model {od_config.model}")
 
-            od_config.tf_model_config = TransformerConfig.from_dict(cfg)
+            od_config.set_tf_model_config(TransformerConfig.from_dict(cfg))
             model_type = cfg.get("model_type")
             architectures = cfg.get("architectures") or []
             # Bagel/NextStep models don't have a model_index.json, so we set the pipeline class name manually

From 3ee65b1206c7e949ac539ca5d32a9773a092d8a3 Mon Sep 17 00:00:00 2001
From: Chen Yang <2082464740@qq.com>
Date: Thu, 2 Apr 2026 19:29:23 +0800
Subject: [PATCH 04/10] Fix bug

Signed-off-by: Chen Yang <2082464740@qq.com>
---
 vllm_omni/entrypoints/async_omni_diffusion.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index e394f020e23..26d4ddcb84c 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -117,7 +117,7 @@ def __init__(
                 od_config.update_multimodal_support()
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)
-                od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict))
+                od_config.tf_model_config = TransformerConfig.from_dict(tf_config_dict)
             else:
                 raise FileNotFoundError("model_index.json not found")
         except (AttributeError, OSError, ValueError, FileNotFoundError):
@@ -125,7 +125,7 @@ def __init__(
             if cfg is None:
                 raise ValueError(f"Could not find config.json or model_index.json for model {od_config.model}")
 
-            od_config.set_tf_model_config(TransformerConfig.from_dict(cfg))
+            od_config.tf_model_config = TransformerConfig.from_dict(cfg)
             model_type = cfg.get("model_type")
             architectures = cfg.get("architectures") or []
             # Bagel/NextStep models don't have a model_index.json, so we set the pipeline class name manually

From 84750912c27845355d5907dc1e78f34bb716a453 Mon Sep 17 00:00:00 2001
From: Chen Yang <2082464740@qq.com>
Date: Fri, 3 Apr 2026 08:28:37 +0800
Subject: [PATCH 05/10] Fix bug

Signed-off-by: Chen Yang <2082464740@qq.com>
---
 .../x_to_video_audio/download_dreamid_omni.py                 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
index 4cd076c0687..5a106688814 100644
--- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
+++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
@@ -89,7 +89,7 @@ def main(output_dir: str):
         json.dump(data, f, indent=2)
 
     print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}")
-    
+
     transformer_dir = os.path.join(output_dir, "transformer")
     os.makedirs(transformer_dir, exist_ok=True)
     with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f:
@@ -106,4 +106,4 @@ def main(output_dir: str):
         "--output-dir", type=str, default="./dreamid_omni", help="Base directory to save downloaded models"
     )
     args = parser.parse_args()
-    main(args.output_dir)
\ No newline at end of file
+    main(args.output_dir)

From 78e9f49a4462a49580395846311927012a9f33f6 Mon Sep 17 00:00:00 2001
From: erfgss <97771661+erfgss@users.noreply.github.com>
Date: Fri, 3 Apr 2026 09:30:43 +0800
Subject: [PATCH 06/10] Refactor model configuration assignment logic

Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com>
---
 vllm_omni/entrypoints/async_omni_diffusion.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index 26d4ddcb84c..7df7cc415ba 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -110,10 +110,7 @@ def __init__(
             if config_dict is not None:
                 if od_config.model_class_name is None:
                     od_config.model_class_name = config_dict.get("_class_name", None)
-
-                if od_config.model_class_name == "DreamIDOmniPipeline":
-                    model_config = {k: v for k, v in config_dict.items() if k != "_class_name"}
-                    od_config.model_config = model_config
+                    od_config.model_config = {k: v for k, v in config_dict.items() if k != "_class_name"}
                 od_config.update_multimodal_support()
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)

From 97599d242beb7939b0c8d82225811e706ed870ae Mon Sep 17 00:00:00 2001
From: Chen Yang <2082464740@qq.com>
Date: Fri, 3 Apr 2026 15:01:46 +0800
Subject: [PATCH 07/10] refine

Signed-off-by: Chen Yang <2082464740@qq.com>
---
 .../x_to_video_audio/download_dreamid_omni.py         |  3 +--
 vllm_omni/entrypoints/async_omni_diffusion.py         | 11 ++++-------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
index 5a106688814..2f66d5f7789 100644
--- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
+++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
@@ -82,7 +82,6 @@ def main(output_dir: str):
 
     data = {
         "_class_name": "DreamIDOmniPipeline",
-        "fusion": "DreamID-Omni/dreamid_omni.safetensors",
     }
 
     with open(os.path.join(output_dir, "model_index.json"), "w", encoding="utf-8") as f:
@@ -93,7 +92,7 @@ def main(output_dir: str):
     transformer_dir = os.path.join(output_dir, "transformer")
     os.makedirs(transformer_dir, exist_ok=True)
     with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f:
-        json.dump({}, f)
+        json.dump({"fusion": "DreamID-Omni/dreamid_omni.safetensors"}, f)
     print(f"transformer/config.json created at {os.path.join(transformer_dir, 'config.json')}")
 
     # now we download the dependency code
diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index 26d4ddcb84c..a0c9fbfc5a6 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -110,14 +110,11 @@ def __init__(
             if config_dict is not None:
                 if od_config.model_class_name is None:
                     od_config.model_class_name = config_dict.get("_class_name", None)
-
-                if od_config.model_class_name == "DreamIDOmniPipeline":
-                    model_config = {k: v for k, v in config_dict.items() if k != "_class_name"}
-                    od_config.model_config = model_config
                 od_config.update_multimodal_support()
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)
-                od_config.tf_model_config = TransformerConfig.from_dict(tf_config_dict)
+                od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict))
+                od_config.model_config = tf_config_dict
             else:
                 raise FileNotFoundError("model_index.json not found")
         except (AttributeError, OSError, ValueError, FileNotFoundError):
@@ -125,7 +122,7 @@ def __init__(
             if cfg is None:
                 raise ValueError(f"Could not find config.json or model_index.json for model {od_config.model}")
 
-            od_config.tf_model_config = TransformerConfig.from_dict(cfg)
+            od_config.set_tf_model_config(TransformerConfig.from_dict(cfg))
             model_type = cfg.get("model_type")
             architectures = cfg.get("architectures") or []
             # Bagel/NextStep models don't have a model_index.json, so we set the pipeline class name manually
@@ -474,4 +471,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None
             self.engine.profile,
             is_start,
             profile_prefix,
-        )
+        )
\ No newline at end of file

From d1fcc5422595fd3f642d1069b00f9640dd1f9287 Mon Sep 17 00:00:00 2001
From: erfgss <97771661+erfgss@users.noreply.github.com>
Date: Fri, 3 Apr 2026 15:11:38 +0800
Subject: [PATCH 08/10] Update async_omni_diffusion.py

Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com>
---
 vllm_omni/entrypoints/async_omni_diffusion.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index bad90698fd1..e92eb0bdf56 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -110,7 +110,6 @@ def __init__(
             if config_dict is not None:
                 if od_config.model_class_name is None:
                     od_config.model_class_name = config_dict.get("_class_name", None)
-                    od_config.model_config = {k: v for k, v in config_dict.items() if k != "_class_name"}
                 od_config.update_multimodal_support()
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)
@@ -472,4 +471,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None
             self.engine.profile,
             is_start,
             profile_prefix,
-        )
\ No newline at end of file
+        )

From 497124bbfb3d5328f696a3161463ca71a05f3425 Mon Sep 17 00:00:00 2001
From: erfgss <97771661+erfgss@users.noreply.github.com>
Date: Fri, 3 Apr 2026 15:35:51 +0800
Subject: [PATCH 09/10] Update pipeline_dreamid_omni.py

Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com>
---
 .../diffusion/models/dreamid_omni/pipeline_dreamid_omni.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py b/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py
index f8074fee229..e22765f80eb 100644
--- a/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py
+++ b/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py
@@ -116,7 +116,7 @@ def __init__(
         ## load audio/video model config
         Fusion_model = FusionModel(VIDEO_CONFIG, AUDIO_CONFIG)
 
-        checkpoint_path = self.od_config.model_config.get("fusion", None)
+        checkpoint_path = self.od_config.tf_model_config.get("fusion", None)
         assert checkpoint_path is not None, "fusion checkpoint path is None"
         load_fusion_checkpoint(Fusion_model, checkpoint_path=os.path.join(model, checkpoint_path))
         self.model = Fusion_model

From 14019de0846d9f6ef1d3032586ad6fd1cb38c875 Mon Sep 17 00:00:00 2001
From: erfgss <97771661+erfgss@users.noreply.github.com>
Date: Fri, 3 Apr 2026 15:36:14 +0800
Subject: [PATCH 10/10] Update async_omni_diffusion.py

Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com>
---
 vllm_omni/entrypoints/async_omni_diffusion.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py
index e92eb0bdf56..558ef96cb98 100644
--- a/vllm_omni/entrypoints/async_omni_diffusion.py
+++ b/vllm_omni/entrypoints/async_omni_diffusion.py
@@ -114,7 +114,6 @@ def __init__(
 
                 tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model)
                 od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict))
-                od_config.model_config = tf_config_dict
             else:
                 raise FileNotFoundError("model_index.json not found")
         except (AttributeError, OSError, ValueError, FileNotFoundError):