diff --git a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py index 0dbf402e9e3..2f66d5f7789 100644 --- a/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py +++ b/examples/offline_inference/x_to_video_audio/download_dreamid_omni.py @@ -82,7 +82,6 @@ def main(output_dir: str): data = { "_class_name": "DreamIDOmniPipeline", - "fusion": "DreamID-Omni/dreamid_omni.safetensors", } with open(os.path.join(output_dir, "model_index.json"), "w", encoding="utf-8") as f: @@ -90,6 +89,12 @@ def main(output_dir: str): print(f"model_index.json created at {os.path.join(output_dir, 'model_index.json')}") + transformer_dir = os.path.join(output_dir, "transformer") + os.makedirs(transformer_dir, exist_ok=True) + with open(os.path.join(transformer_dir, "config.json"), "w", encoding="utf-8") as f: + json.dump({"fusion": "DreamID-Omni/dreamid_omni.safetensors"}, f) + print(f"transformer/config.json created at {os.path.join(transformer_dir, 'config.json')}") + # now we download the dependency code download_dependency() diff --git a/examples/offline_inference/x_to_video_audio/x_to_video_audio.md b/examples/offline_inference/x_to_video_audio/x_to_video_audio.md index 59b993a728d..4b5188f41b2 100644 --- a/examples/offline_inference/x_to_video_audio/x_to_video_audio.md +++ b/examples/offline_inference/x_to_video_audio/x_to_video_audio.md @@ -24,7 +24,9 @@ dreamid_omni/ │ ├── models_t5_umt5-xxl-enc-bf16.pth │ ├── Wan2.2_VAE.pth │ -├── model_index.json # create by download_dreamid_omni.py +├── model_index.json +└── transformer/ + └── config.json # create by download_dreamid_omni.py ``` ### Run the Inference diff --git a/examples/offline_inference/x_to_video_audio/x_to_video_audio.py b/examples/offline_inference/x_to_video_audio/x_to_video_audio.py index 17d0f06c3c5..e0424add69b 100644 --- a/examples/offline_inference/x_to_video_audio/x_to_video_audio.py +++ b/examples/offline_inference/x_to_video_audio/x_to_video_audio.py @@ -132,8 +132,8 @@ def main() -> None: if not outputs: raise RuntimeError("No output returned from DreamID-Omni.") output = outputs[0].request_output - generated_video = output[0].images[0][0] - generated_audio = output[0].images[0][1] + generated_video = output.images[0][0] + generated_audio = output.images[0][1] try: from dreamid_omni.utils.io_utils import save_video except Exception as e: diff --git a/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py b/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py index f8074fee229..e22765f80eb 100644 --- a/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py +++ b/vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py @@ -116,7 +116,7 @@ def __init__( ## load audio/video model config Fusion_model = FusionModel(VIDEO_CONFIG, AUDIO_CONFIG) - checkpoint_path = self.od_config.model_config.get("fusion", None) + checkpoint_path = self.od_config.tf_model_config.get("fusion", None) assert checkpoint_path is not None, "fusion checkpoint path is None" load_fusion_checkpoint(Fusion_model, checkpoint_path=os.path.join(model, checkpoint_path)) self.model = Fusion_model