From 89ffdaddccc89d03f05e258cfbc35401b12c44a7 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 26 Jan 2026 16:23:38 +0800 Subject: [PATCH 01/17] Create README.md Signed-off-by: zhou zhuoxin --- .../offline_inference/text_to_audio/README.md | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 examples/offline_inference/text_to_audio/README.md diff --git a/examples/offline_inference/text_to_audio/README.md b/examples/offline_inference/text_to_audio/README.md new file mode 100644 index 00000000000..45bc20ee7ea --- /dev/null +++ b/examples/offline_inference/text_to_audio/README.md @@ -0,0 +1,38 @@ +# Text-To-Audio + +The `stabilityai/stable-audio-open-1.0` pipeline generates audio from text prompts. + +## Prerequisites + +If you use a gated model (e.g., `stabilityai/stable-audio-open-1.0`), ensure you have access: + +1. **Accept Model License**: Visit the model page on Hugging Face (e.g., [stabilityai/stable-audio-open-1.0]) and accept the user agreement. +2. **Authenticate**: Log in to Hugging Face locally to access the gated model. + ```bash + huggingface-cli login + ``` + +## Local CLI Usage + +```bash +python text_to_audio.py \ + --model stabilityai/stable-audio-open-1.0 \ + --prompt "The sound of a hammer hitting a wooden surface" \ + --negative_prompt "Low quality" \ + --seed 42 \ + --guidance_scale 7.0 \ + --audio_length 10.0 \ + --num_inference_steps 100 \ + --output stable_audio_output.wav +``` + +Key arguments: + +- `--prompt`: text description (string). +- `--negative_prompt`: negative prompt for classifier-free guidance. +- `--seed`: integer seed for deterministic generation. +- `--guidance_scale`: classifier-free guidance scale. +- `--audio_length`: audio duration in seconds. +- `--num_inference_steps`: diffusion sampling steps.(more steps = higher quality, slower). +- `--output`: path to save the generated WAV file. + From 0274127c375c86999a32bb63e430971518759571 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 26 Jan 2026 16:44:54 +0800 Subject: [PATCH 02/17] Update README.md Signed-off-by: zhou zhuoxin --- examples/offline_inference/text_to_audio/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/offline_inference/text_to_audio/README.md b/examples/offline_inference/text_to_audio/README.md index 45bc20ee7ea..8ec1eafe52d 100644 --- a/examples/offline_inference/text_to_audio/README.md +++ b/examples/offline_inference/text_to_audio/README.md @@ -35,4 +35,3 @@ Key arguments: - `--audio_length`: audio duration in seconds. - `--num_inference_steps`: diffusion sampling steps.(more steps = higher quality, slower). - `--output`: path to save the generated WAV file. - From 942b9bae1c13d75676f8ef3b123f26bc790814e8 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 26 Jan 2026 16:45:29 +0800 Subject: [PATCH 03/17] Fix formatting in README for output option Signed-off-by: zhou zhuoxin From 813dd18188045d3a219d8d73029046418a82b106 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:09:21 +0800 Subject: [PATCH 04/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 97357dc3b33..b8e27b12222 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -42,6 +42,9 @@ resolve_model_config_path, ) from vllm_omni.inputs.data import OmniDiffusionSamplingParams, OmniPromptType, OmniSamplingParams +from vllm_omni.model_executor.model_loader.weight_utils import ( + download_weights_from_hf_specific, +) from vllm_omni.outputs import OmniRequestOutput logger = init_logger(__name__) @@ -74,8 +77,23 @@ def omni_snapshot_download(model_id) -> str: from modelscope.hub.snapshot_download import snapshot_download return snapshot_download(model_id) - else: - return _dummy_snapshot_download(model_id) + + # If it's already a local path, just return it + if os.path.exists(model_id): + return model_id + + # For other cases (Hugging Face), perform a real download to ensure all + # necessary files (including *.pt for audio/diffusion) are available locally + # before stage workers are spawned. This prevents initialization timeouts. + return download_weights_from_hf_specific( + model_id, + None, + allow_patterns=[ + "*.json", "*.bin", "*.safetensors", "*.pt", "*.txt", "*.model", + "*.yaml" + ], + require_all=True, + ) class OmniBase: From da08cf94a77317f81a11b626bc373de911460424 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:12:38 +0800 Subject: [PATCH 05/17] Update weight_utils.py Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index 7432ad9a2a4..35fee67616e 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -20,6 +20,7 @@ def download_weights_from_hf_specific( allow_patterns: list[str], revision: str | None = None, ignore_patterns: str | list[str] | None = None, + require_all: bool = False, ) -> str: """Download model weights from Hugging Face Hub. Users can specify the allow_patterns to download only the necessary weights. @@ -35,6 +36,8 @@ def download_weights_from_hf_specific( ignore_patterns (Optional[Union[str, list[str]]]): The patterns to filter out the weight files. Files matched by any of the patterns will be ignored. + require_all (bool): If True, will download all patterns instead of + returning after the first one that contains files. Returns: str: The path to the downloaded model weights. @@ -59,8 +62,8 @@ def download_weights_from_hf_specific( **download_kwargs, ) # If we have downloaded weights for this allow_pattern, - # we don't need to check the rest. - if any(Path(hf_folder).glob(allow_pattern)): + # we don't need to check the rest,unless require_all is set. + if not require_all and any(Path(hf_folder).glob(allow_pattern)): break time_taken = time.perf_counter() - start_time if time_taken > 0.5: From 9f98029e72545d0109ee8a819c6d1b5ba3e62315 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:36:57 +0800 Subject: [PATCH 06/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index b8e27b12222..6c61a7bda6c 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -71,6 +71,10 @@ def _dummy_snapshot_download(model_id): def omni_snapshot_download(model_id) -> str: + # If it's already a local path, just return it + if os.path.exists(model_id): + return model_id + # TODO: this is just a workaround for quickly use modelscope, we should support # modelscope in weight loading feature instead of using `snapshot_download` if os.environ.get("VLLM_USE_MODELSCOPE", False): @@ -78,19 +82,15 @@ def omni_snapshot_download(model_id) -> str: return snapshot_download(model_id) - # If it's already a local path, just return it - if os.path.exists(model_id): - return model_id - # For other cases (Hugging Face), perform a real download to ensure all # necessary files (including *.pt for audio/diffusion) are available locally # before stage workers are spawned. This prevents initialization timeouts. return download_weights_from_hf_specific( - model_id, - None, + model_name_or_path=model_id, + cache_dir=None, allow_patterns=[ - "*.json", "*.bin", "*.safetensors", "*.pt", "*.txt", "*.model", - "*.yaml" + "**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", + "**/*.txt", "**/*.model", "**/*.yaml" ], require_all=True, ) From e1edaa816817bcfb6dde311106d933b8b0f5d196 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 01:39:14 +0800 Subject: [PATCH 07/17] Update weight_utils.py Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index 35fee67616e..b38bbc974a6 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -36,8 +36,7 @@ def download_weights_from_hf_specific( ignore_patterns (Optional[Union[str, list[str]]]): The patterns to filter out the weight files. Files matched by any of the patterns will be ignored. - require_all (bool): If True, will download all patterns instead of - returning after the first one that contains files. + require_all (bool): If True, will download all patterns. Returns: str: The path to the downloaded model weights. From fadbfe224f889578e715f49d9986bd1c69a6407e Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 12:37:14 +0800 Subject: [PATCH 08/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 6c61a7bda6c..c96158ccf5e 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -74,24 +74,19 @@ def omni_snapshot_download(model_id) -> str: # If it's already a local path, just return it if os.path.exists(model_id): return model_id - # TODO: this is just a workaround for quickly use modelscope, we should support # modelscope in weight loading feature instead of using `snapshot_download` if os.environ.get("VLLM_USE_MODELSCOPE", False): from modelscope.hub.snapshot_download import snapshot_download return snapshot_download(model_id) - # For other cases (Hugging Face), perform a real download to ensure all # necessary files (including *.pt for audio/diffusion) are available locally # before stage workers are spawned. This prevents initialization timeouts. return download_weights_from_hf_specific( model_name_or_path=model_id, cache_dir=None, - allow_patterns=[ - "**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", - "**/*.txt", "**/*.model", "**/*.yaml" - ], + allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt","**/*.txt", "**/*.model", "**/*.yaml"], require_all=True, ) From f9b147e87ac06d137c9b3ac5f452d36a7505c136 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 12:40:24 +0800 Subject: [PATCH 09/17] Fix formatting of allow_patterns list in omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index c96158ccf5e..eec6537a2af 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -86,7 +86,7 @@ def omni_snapshot_download(model_id) -> str: return download_weights_from_hf_specific( model_name_or_path=model_id, cache_dir=None, - allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt","**/*.txt", "**/*.model", "**/*.yaml"], + allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", "**/*.txt", "**/*.model", "**/*.yaml"], require_all=True, ) From 4589643a559aa6fb461dfeb9566a6843c6d9fe68 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 14:26:35 +0800 Subject: [PATCH 10/17] Update vllm_omni/model_executor/model_loader/weight_utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index b38bbc974a6..7347ee20a1c 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -61,7 +61,7 @@ def download_weights_from_hf_specific( **download_kwargs, ) # If we have downloaded weights for this allow_pattern, - # we don't need to check the rest,unless require_all is set. + # we don't need to check the rest, unless require_all is set. if not require_all and any(Path(hf_folder).glob(allow_pattern)): break time_taken = time.perf_counter() - start_time From b4ac9800583ae1e9f148497d97a29ba46044cc4c Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Thu, 5 Feb 2026 14:26:59 +0800 Subject: [PATCH 11/17] Update vllm_omni/model_executor/model_loader/weight_utils.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: zhou zhuoxin --- vllm_omni/model_executor/model_loader/weight_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index 7347ee20a1c..d147269d666 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -36,7 +36,9 @@ def download_weights_from_hf_specific( ignore_patterns (Optional[Union[str, list[str]]]): The patterns to filter out the weight files. Files matched by any of the patterns will be ignored. - require_all (bool): If True, will download all patterns. + require_all (bool): If True, will iterate through and download files + matching all patterns in allow_patterns. If False, will stop after + the first pattern that matches any files. Returns: str: The path to the downloaded model weights. From 912687d72198bb0e4b70100c9c60115eb163a238 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 9 Feb 2026 14:30:42 +0800 Subject: [PATCH 12/17] Update weight_utils.py Signed-off-by: zhou zhuoxin --- .../model_loader/weight_utils.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/vllm_omni/model_executor/model_loader/weight_utils.py b/vllm_omni/model_executor/model_loader/weight_utils.py index d147269d666..c5225ec0287 100644 --- a/vllm_omni/model_executor/model_loader/weight_utils.py +++ b/vllm_omni/model_executor/model_loader/weight_utils.py @@ -52,20 +52,31 @@ def download_weights_from_hf_specific( # downloading the same model weights at the same time. with get_lock(model_name_or_path, cache_dir): start_time = time.perf_counter() - for allow_pattern in allow_patterns: + if require_all: hf_folder = snapshot_download( model_name_or_path, - allow_patterns=allow_pattern, + allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, cache_dir=cache_dir, revision=revision, local_files_only=local_only, **download_kwargs, ) - # If we have downloaded weights for this allow_pattern, - # we don't need to check the rest, unless require_all is set. - if not require_all and any(Path(hf_folder).glob(allow_pattern)): - break + else: + for allow_pattern in allow_patterns: + hf_folder = snapshot_download( + model_name_or_path, + allow_patterns=allow_pattern, + ignore_patterns=ignore_patterns, + cache_dir=cache_dir, + revision=revision, + local_files_only=local_only, + **download_kwargs, + ) + # If we have downloaded weights for this allow_pattern, + # we don't need to check the rest, unless require_all is set. + if any(Path(hf_folder).glob(allow_pattern)): + break time_taken = time.perf_counter() - start_time if time_taken > 0.5: logger.info( From 7a582d1fe526d12bfe673bba1904d2a61d3920b8 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 9 Feb 2026 15:47:40 +0800 Subject: [PATCH 13/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index eec6537a2af..6025d82b46c 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -86,7 +86,7 @@ def omni_snapshot_download(model_id) -> str: return download_weights_from_hf_specific( model_name_or_path=model_id, cache_dir=None, - allow_patterns=["**/*.json", "**/*.bin", "**/*.safetensors", "**/*.pt", "**/*.txt", "**/*.model", "**/*.yaml"], + allow_patterns=["*"], require_all=True, ) From 41e6c7d19200a625e04d4651accb4f2c0c0c9691 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 9 Feb 2026 17:58:45 +0800 Subject: [PATCH 14/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 6025d82b46c..cf543718397 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import huggingface_hub import json import multiprocessing as mp import os @@ -83,12 +84,16 @@ def omni_snapshot_download(model_id) -> str: # For other cases (Hugging Face), perform a real download to ensure all # necessary files (including *.pt for audio/diffusion) are available locally # before stage workers are spawned. This prevents initialization timeouts. - return download_weights_from_hf_specific( - model_name_or_path=model_id, - cache_dir=None, - allow_patterns=["*"], - require_all=True, - ) + try: + return download_weights_from_hf_specific( + model_name_or_path=model_id, + cache_dir=None, + allow_patterns=["*"], + require_all=True, + ) + except huggingface_hub.errors.RepositoryNotFoundError: + logger.warning(f"Repository not found for '{model_id}'.") + return model_id class OmniBase: From e7986cb16a47e26a077f39ced6e4407aa9e9479b Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 9 Feb 2026 18:14:33 +0800 Subject: [PATCH 15/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index cf543718397..9f0663f0a95 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import huggingface_hub import json import multiprocessing as mp import os @@ -12,7 +11,7 @@ from dataclasses import asdict from pprint import pformat from typing import Any, Literal, overload - +import huggingface_hub from omegaconf import OmegaConf from tqdm.auto import tqdm from vllm import SamplingParams From 90eca067e5404389bd8d6a1be4e79f35d52f6ae9 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 9 Feb 2026 18:26:16 +0800 Subject: [PATCH 16/17] Add import for huggingface_hub in omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 9f0663f0a95..449e5532d87 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -11,6 +11,7 @@ from dataclasses import asdict from pprint import pformat from typing import Any, Literal, overload + import huggingface_hub from omegaconf import OmegaConf from tqdm.auto import tqdm From 9b3c66c34deeeb3aafb7b1d03896b03d90ddefa1 Mon Sep 17 00:00:00 2001 From: zhou zhuoxin Date: Mon, 9 Feb 2026 19:28:52 +0800 Subject: [PATCH 17/17] Update omni.py Signed-off-by: zhou zhuoxin --- vllm_omni/entrypoints/omni.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_omni/entrypoints/omni.py b/vllm_omni/entrypoints/omni.py index 42fcab4fd66..510813c6abd 100644 --- a/vllm_omni/entrypoints/omni.py +++ b/vllm_omni/entrypoints/omni.py @@ -40,10 +40,10 @@ resolve_model_config_path, ) from vllm_omni.inputs.data import OmniDiffusionSamplingParams, OmniPromptType, OmniSamplingParams +from vllm_omni.metrics import OrchestratorAggregator, StageRequestStats from vllm_omni.model_executor.model_loader.weight_utils import ( download_weights_from_hf_specific, ) -from vllm_omni.metrics import OrchestratorAggregator, StageRequestStats from vllm_omni.outputs import OmniRequestOutput logger = init_logger(__name__)