From da58a2c379aa744919f22096d24a71163661a43f Mon Sep 17 00:00:00 2001 From: Chen Yang <2082464740@qq.com> Date: Thu, 2 Apr 2026 12:43:11 +0800 Subject: [PATCH 01/10] Fix: DreamID-Omni Signed-off-by: Chen Yang <2082464740@qq.com> --- .../x_to_video_audio/download_dreamid_omni.py | 14 ++++++++++++-- .../x_to_video_audio/x_to_video_audio.md | 4 +++- .../x_to_video_audio/x_to_video_audio.py | 4 ++-- vllm_omni/entrypoints/async_omni_diffusion.py | 8 +++++++- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py index 0dbf402e9e3..fb97da040d2 100644 --- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py +++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py @@ -7,7 +7,7 @@ import time from pathlib import Path -from huggingface_hub import snapshot_download +from huggingface_hub import snapshot_download,hf_hub_download DEPENDENCY_REPO = "https://github.com/bytedance/DreamID-V.git" DEPENDENCY_BRANCH = "omni" @@ -89,7 +89,17 @@ def main(output_dir: str): json.dump(data, f, indent=2) print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}") - + # Download transformer config for Wan2.2-TI2V-5B from HuggingFace + transformer_dir = os.path.join(output_dir, "transformer") + os.makedirs(transformer_dir, exist_ok=True) + + print("Downloading transformer/config.json from Wan-AI/Wan2.2-TI2V-5B-Diffusers...") + transformer_config_path = hf_hub_download( + repo_id="Wan-AI/Wan2.2-TI2V-5B-Diffusers", + filename="transformer/config.json", + local_dir=output_dir, + local_dir_use_symlinks=False, + ) # now we download the dependency code download_dependency() diff --git a/examples/offline_inference/x_to_video_audio/x_to_video_audio.md b/examples/offline_inference/x_to_video_audio/x_to_video_audio.md index 59b993a728d..4b5188f41b2 100644 --- a/examples/offline_inference/x_to_video_audio/x_to_video_audio.md +++ b/examples/offline_inference/x_to_video_audio/x_to_video_audio.md @@ -24,7 +24,9 @@ dreamid_omni/ │ ├── models_t5_umt5-xxl-enc-bf16.pth │ ├── Wan2.2_VAE.pth │ -├── model_index.json # create by download_dreamid_omni.py +├── model_index.json +└── transformer/ + └── config.json # create by download_dreamid_omni.py ``` ### Run the Inference diff --git a/examples/offline_inference/x_to_video_audio/x_to_video_audio.py b/examples/offline_inference/x_to_video_audio/x_to_video_audio.py index 17d0f06c3c5..e0424add69b 100644 --- a/examples/offline_inference/x_to_video_audio/x_to_video_audio.py +++ b/examples/offline_inference/x_to_video_audio/x_to_video_audio.py @@ -132,8 +132,8 @@ def main() -> None: if not outputs: raise RuntimeError("No output returned from DreamID-Omni.") output = outputs[0].request_output - generated_video = output[0].images[0][0] - generated_audio = output[0].images[0][1] + generated_video = output.images[0][0] + generated_audio = output.images[0][1] try: from dreamid_omni.utils.io_utils import save_video except Exception as e: diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index 674c3509d22..5f0f682f868 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -110,6 +110,12 @@ def __init__( if config_dict is not None: if od_config.model_class_name is None: od_config.model_class_name = config_dict.get("_class_name", None) + # Populate model_config with the contents of model_index.json + # This includes model-specific keys like "fusion" for DreamID-Omni + if not od_config.model_config: + # Remove _class_name as it's already stored in model_class_name + model_config = {k: v for k, v in config_dict.items() if k != "_class_name"} + od_config.model_config = model_config od_config.update_multimodal_support() tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) @@ -470,4 +476,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None self.engine.profile, is_start, profile_prefix, - ) + ) \ No newline at end of file From 09a2e96f408a018e7829eee41aff37ddba1bb512 Mon Sep 17 00:00:00 2001 From: Chen Yang <2082464740@qq.com> Date: Thu, 2 Apr 2026 12:55:19 +0800 Subject: [PATCH 02/10] Fix pre-commit Signed-off-by: Chen Yang <2082464740@qq.com> --- .../x_to_video_audio/download_dreamid_omni.py | 4 ++-- vllm_omni/entrypoints/async_omni_diffusion.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py index fb97da040d2..453e269894c 100644 --- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py +++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py @@ -7,7 +7,7 @@ import time from pathlib import Path -from huggingface_hub import snapshot_download,hf_hub_download +from huggingface_hub import hf_hub_download, snapshot_download DEPENDENCY_REPO = "https://github.com/bytedance/DreamID-V.git" DEPENDENCY_BRANCH = "omni" @@ -94,7 +94,7 @@ def main(output_dir: str): os.makedirs(transformer_dir, exist_ok=True) print("Downloading transformer/config.json from Wan-AI/Wan2.2-TI2V-5B-Diffusers...") - transformer_config_path = hf_hub_download( + hf_hub_download( repo_id="Wan-AI/Wan2.2-TI2V-5B-Diffusers", filename="transformer/config.json", local_dir=output_dir, diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index 5f0f682f868..c714b633906 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -476,4 +476,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None self.engine.profile, is_start, profile_prefix, - ) \ No newline at end of file + ) From ea9403bc026dc84db854da3f97ee03df5b34ed08 Mon Sep 17 00:00:00 2001 From: Chen Yang <2082464740@qq.com> Date: Thu, 2 Apr 2026 19:08:04 +0800 Subject: [PATCH 03/10] Fix bug Signed-off-by: Chen Yang <2082464740@qq.com> --- .../x_to_video_audio/download_dreamid_omni.py | 16 ++++++---------- vllm_omni/entrypoints/async_omni_diffusion.py | 10 ++++------ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py index 453e269894c..4cd076c0687 100644 --- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py +++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py @@ -7,7 +7,7 @@ import time from pathlib import Path -from huggingface_hub import hf_hub_download, snapshot_download +from huggingface_hub import snapshot_download DEPENDENCY_REPO = "https://github.com/bytedance/DreamID-V.git" DEPENDENCY_BRANCH = "omni" @@ -89,17 +89,13 @@ def main(output_dir: str): json.dump(data, f, indent=2) print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}") - # Download transformer config for Wan2.2-TI2V-5B from HuggingFace + transformer_dir = os.path.join(output_dir, "transformer") os.makedirs(transformer_dir, exist_ok=True) + with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f: + json.dump({}, f) + print(f"transformer/config.json created at {os.path.join(transformer_dir, 'config.json')}") - print("Downloading transformer/config.json from Wan-AI/Wan2.2-TI2V-5B-Diffusers...") - hf_hub_download( - repo_id="Wan-AI/Wan2.2-TI2V-5B-Diffusers", - filename="transformer/config.json", - local_dir=output_dir, - local_dir_use_symlinks=False, - ) # now we download the dependency code download_dependency() @@ -110,4 +106,4 @@ def main(output_dir: str): "--output-dir", type=str, default="./dreamid_omni", help="Base directory to save downloaded models" ) args = parser.parse_args() - main(args.output_dir) + main(args.output_dir) \ No newline at end of file diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index c714b633906..e394f020e23 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -110,16 +110,14 @@ def __init__( if config_dict is not None: if od_config.model_class_name is None: od_config.model_class_name = config_dict.get("_class_name", None) - # Populate model_config with the contents of model_index.json - # This includes model-specific keys like "fusion" for DreamID-Omni - if not od_config.model_config: - # Remove _class_name as it's already stored in model_class_name + + if od_config.model_class_name == "DreamIDOmniPipeline": model_config = {k: v for k, v in config_dict.items() if k != "_class_name"} od_config.model_config = model_config od_config.update_multimodal_support() tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) - od_config.tf_model_config = TransformerConfig.from_dict(tf_config_dict) + od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict)) else: raise FileNotFoundError("model_index.json not found") except (AttributeError, OSError, ValueError, FileNotFoundError): @@ -127,7 +125,7 @@ def __init__( if cfg is None: raise ValueError(f"Could not find config.json or model_index.json for model {od_config.model}") - od_config.tf_model_config = TransformerConfig.from_dict(cfg) + od_config.set_tf_model_config(TransformerConfig.from_dict(cfg)) model_type = cfg.get("model_type") architectures = cfg.get("architectures") or [] # Bagel/NextStep models don't have a model_index.json, so we set the pipeline class name manually From 3ee65b1206c7e949ac539ca5d32a9773a092d8a3 Mon Sep 17 00:00:00 2001 From: Chen Yang <2082464740@qq.com> Date: Thu, 2 Apr 2026 19:29:23 +0800 Subject: [PATCH 04/10] Fix bug Signed-off-by: Chen Yang <2082464740@qq.com> --- vllm_omni/entrypoints/async_omni_diffusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index e394f020e23..26d4ddcb84c 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -117,7 +117,7 @@ def __init__( od_config.update_multimodal_support() tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) - od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict)) + od_config.tf_model_config = TransformerConfig.from_dict(tf_config_dict) else: raise FileNotFoundError("model_index.json not found") except (AttributeError, OSError, ValueError, FileNotFoundError): @@ -125,7 +125,7 @@ def __init__( if cfg is None: raise ValueError(f"Could not find config.json or model_index.json for model {od_config.model}") - od_config.set_tf_model_config(TransformerConfig.from_dict(cfg)) + od_config.tf_model_config = TransformerConfig.from_dict(cfg) model_type = cfg.get("model_type") architectures = cfg.get("architectures") or [] # Bagel/NextStep models don't have a model_index.json, so we set the pipeline class name manually From 84750912c27845355d5907dc1e78f34bb716a453 Mon Sep 17 00:00:00 2001 From: Chen Yang <2082464740@qq.com> Date: Fri, 3 Apr 2026 08:28:37 +0800 Subject: [PATCH 05/10] Fix bug Signed-off-by: Chen Yang <2082464740@qq.com> --- .../x_to_video_audio/download_dreamid_omni.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py index 4cd076c0687..5a106688814 100644 --- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py +++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py @@ -89,7 +89,7 @@ def main(output_dir: str): json.dump(data, f, indent=2) print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}") - + transformer_dir = os.path.join(output_dir, "transformer") os.makedirs(transformer_dir, exist_ok=True) with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f: @@ -106,4 +106,4 @@ def main(output_dir: str): "--output-dir", type=str, default="./dreamid_omni", help="Base directory to save downloaded models" ) args = parser.parse_args() - main(args.output_dir) \ No newline at end of file + main(args.output_dir) From 78e9f49a4462a49580395846311927012a9f33f6 Mon Sep 17 00:00:00 2001 From: erfgss <97771661+erfgss@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:30:43 +0800 Subject: [PATCH 06/10] Refactor model configuration assignment logic Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com> --- vllm_omni/entrypoints/async_omni_diffusion.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index 26d4ddcb84c..7df7cc415ba 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -110,10 +110,7 @@ def __init__( if config_dict is not None: if od_config.model_class_name is None: od_config.model_class_name = config_dict.get("_class_name", None) - - if od_config.model_class_name == "DreamIDOmniPipeline": - model_config = {k: v for k, v in config_dict.items() if k != "_class_name"} - od_config.model_config = model_config + od_config.model_config = {k: v for k, v in config_dict.items() if k != "_class_name"} od_config.update_multimodal_support() tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) From 97599d242beb7939b0c8d82225811e706ed870ae Mon Sep 17 00:00:00 2001 From: Chen Yang <2082464740@qq.com> Date: Fri, 3 Apr 2026 15:01:46 +0800 Subject: [PATCH 07/10] refine Signed-off-by: Chen Yang <2082464740@qq.com> --- .../x_to_video_audio/download_dreamid_omni.py | 3 +-- vllm_omni/entrypoints/async_omni_diffusion.py | 11 ++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py index 5a106688814..2f66d5f7789 100644 --- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py +++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py @@ -82,7 +82,6 @@ def main(output_dir: str): data = { "_class_name": "DreamIDOmniPipeline", - "fusion": "DreamID-Omni/dreamid_omni.safetensors", } with open(os.path.join(output_dir, "model_index.json"), "w", encoding="utf-8") as f: @@ -93,7 +92,7 @@ def main(output_dir: str): transformer_dir = os.path.join(output_dir, "transformer") os.makedirs(transformer_dir, exist_ok=True) with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f: - json.dump({}, f) + json.dump({"fusion": "DreamID-Omni/dreamid_omni.safetensors"}, f) print(f"transformer/config.json created at {os.path.join(transformer_dir, 'config.json')}") # now we download the dependency code diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index 26d4ddcb84c..a0c9fbfc5a6 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -110,14 +110,11 @@ def __init__( if config_dict is not None: if od_config.model_class_name is None: od_config.model_class_name = config_dict.get("_class_name", None) - - if od_config.model_class_name == "DreamIDOmniPipeline": - model_config = {k: v for k, v in config_dict.items() if k != "_class_name"} - od_config.model_config = model_config od_config.update_multimodal_support() tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) - od_config.tf_model_config = TransformerConfig.from_dict(tf_config_dict) + od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict)) + od_config.model_config = tf_config_dict else: raise FileNotFoundError("model_index.json not found") except (AttributeError, OSError, ValueError, FileNotFoundError): @@ -125,7 +122,7 @@ def __init__( if cfg is None: raise ValueError(f"Could not find config.json or model_index.json for model {od_config.model}") - od_config.tf_model_config = TransformerConfig.from_dict(cfg) + od_config.set_tf_model_config(TransformerConfig.from_dict(cfg)) model_type = cfg.get("model_type") architectures = cfg.get("architectures") or [] # Bagel/NextStep models don't have a model_index.json, so we set the pipeline class name manually @@ -474,4 +471,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None self.engine.profile, is_start, profile_prefix, - ) + ) \ No newline at end of file From d1fcc5422595fd3f642d1069b00f9640dd1f9287 Mon Sep 17 00:00:00 2001 From: erfgss <97771661+erfgss@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:11:38 +0800 Subject: [PATCH 08/10] Update async_omni_diffusion.py Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com> --- vllm_omni/entrypoints/async_omni_diffusion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index bad90698fd1..e92eb0bdf56 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -110,7 +110,6 @@ def __init__( if config_dict is not None: if od_config.model_class_name is None: od_config.model_class_name = config_dict.get("_class_name", None) - od_config.model_config = {k: v for k, v in config_dict.items() if k != "_class_name"} od_config.update_multimodal_support() tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) @@ -472,4 +471,4 @@ async def profile(self, is_start: bool = True, profile_prefix: str | None = None self.engine.profile, is_start, profile_prefix, - ) \ No newline at end of file + ) From 497124bbfb3d5328f696a3161463ca71a05f3425 Mon Sep 17 00:00:00 2001 From: erfgss <97771661+erfgss@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:35:51 +0800 Subject: [PATCH 09/10] Update pipeline_dreamid_omni.py Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com> --- .../diffusion/models/dreamid_omni/pipeline_dreamid_omni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py b/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py index f8074fee229..e22765f80eb 100644 --- a/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py +++ b/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py @@ -116,7 +116,7 @@ def __init__( ## load audio/video model config Fusion_model = FusionModel(VIDEO_CONFIG, AUDIO_CONFIG) - checkpoint_path = self.od_config.model_config.get("fusion", None) + checkpoint_path = self.od_config.tf_model_config.get("fusion", None) assert checkpoint_path is not None, "fusion checkpoint path is None" load_fusion_checkpoint(Fusion_model, checkpoint_path=os.path.join(model, checkpoint_path)) self.model = Fusion_model From 14019de0846d9f6ef1d3032586ad6fd1cb38c875 Mon Sep 17 00:00:00 2001 From: erfgss <97771661+erfgss@users.noreply.github.com> Date: Fri, 3 Apr 2026 15:36:14 +0800 Subject: [PATCH 10/10] Update async_omni_diffusion.py Signed-off-by: erfgss <97771661+erfgss@users.noreply.github.com> --- vllm_omni/entrypoints/async_omni_diffusion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm_omni/entrypoints/async_omni_diffusion.py b/vllm_omni/entrypoints/async_omni_diffusion.py index e92eb0bdf56..558ef96cb98 100644 --- a/vllm_omni/entrypoints/async_omni_diffusion.py +++ b/vllm_omni/entrypoints/async_omni_diffusion.py @@ -114,7 +114,6 @@ def __init__( tf_config_dict = get_hf_file_to_dict("transformer/config.json", od_config.model) od_config.set_tf_model_config(TransformerConfig.from_dict(tf_config_dict)) - od_config.model_config = tf_config_dict else: raise FileNotFoundError("model_index.json not found") except (AttributeError, OSError, ValueError, FileNotFoundError):