Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion benchmarks/diffusion/quantization_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
import numpy as np
import torch

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides


def compute_lpips_images(
baseline_images: list,
Expand Down Expand Up @@ -137,6 +139,7 @@ def _build_omni_kwargs(args, quantization=None):
)
kwargs = {
"model": args.model,
"explicit_overrides": getattr(args, "_explicit_overrides", None),
"parallel_config": parallel_config,
"enforce_eager": args.enforce_eager,
}
Expand Down Expand Up @@ -452,7 +455,7 @@ def parse_args():
parser.add_argument("--ring-degree", type=int, default=1)
parser.add_argument("--tensor-parallel-size", type=int, default=1)
parser.add_argument("--enforce-eager", action="store_true")
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion examples/offline_inference/bagel/end2end.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import os

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.inputs.data import OmniPromptType
from vllm_omni.model_executor.stage_input_processors.bagel import (
GEN_THINK_SYSTEM_PROMPT,
Expand Down Expand Up @@ -98,7 +99,7 @@ def parse_args():
help="Enable thinking mode: AR stage decodes <think>...</think> planning tokens before image generation.",
)

args = parser.parse_args()
args = parse_args_with_explicit_overrides(parser)
return args


Expand Down Expand Up @@ -152,6 +153,7 @@ def main():
)
if args.quantization:
omni_kwargs["quantization_config"] = args.quantization
omni_kwargs["explicit_overrides"] = getattr(args, "_explicit_overrides", None)

omni = Omni(model=model_name, **omni_kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vllm import SamplingParams
from vllm.assets.audio import AudioAsset

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.model_executor.models.cosyvoice3.config import CosyVoice3Config
from vllm_omni.model_executor.models.cosyvoice3.tokenizer import get_qwen_tokenizer
Expand Down Expand Up @@ -55,7 +56,7 @@ def run_e2e():
required=True,
help="Path to tokenizer directory (e.g., <model_path>/CosyVoice-BlankEN).",
)
args = parser.parse_args()
args = parse_args_with_explicit_overrides(parser)
_ensure_mel_filters_asset()
# Ensure tokenizer directory exists
if not os.path.exists(args.tokenizer):
Expand All @@ -72,6 +73,7 @@ def run_e2e():
# We pass trust_remote_code=True same as Qwen examples
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=args.stage_config,
trust_remote_code=True,
tokenizer=args.tokenizer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from PIL import Image

from vllm_omni.diffusion.data import DiffusionParallelConfig
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams
from vllm_omni.outputs import OmniRequestOutput
Expand Down Expand Up @@ -100,7 +101,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--vae-use-tiling", action="store_true")
parser.add_argument("--enable-cpu-offload", action="store_true")

return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


# ===========================
Expand Down Expand Up @@ -149,6 +150,7 @@ async def main():
# ---- Initialize Omni ----
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
vae_use_slicing=args.vae_use_slicing,
vae_use_tiling=args.vae_use_tiling,
cache_backend=args.cache_backend,
Expand Down
11 changes: 9 additions & 2 deletions examples/offline_inference/dynin_omni/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import torch
from PIL import Image

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides

TASK_CHOICES = ("t2t", "t2i", "t2s", "i2i", "i2t", "s2t", "v2t")

TASK_DEFAULT_RUNTIME = {
Expand Down Expand Up @@ -970,7 +972,7 @@ def parse_args(repo_root: Path) -> argparse.Namespace:
parser.add_argument("--vq-model-audio-local-files-only", action=argparse.BooleanOptionalAction, default=None)

parser.add_argument("--disable-hf-xet", action=argparse.BooleanOptionalAction, default=True)
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def main() -> None:
Expand Down Expand Up @@ -1395,7 +1397,12 @@ def main() -> None:
from vllm_omni.entrypoints.omni import Omni

stage_config_path = str(Path(args.stage_config_path).expanduser())
omni = Omni(model=model_source, stage_configs_path=stage_config_path, dtype=args.dtype)
omni = Omni(
model=model_source,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=stage_config_path,
dtype=args.dtype,
)
sampling_params_list = [
SamplingParams(max_tokens=int(args.max_tokens_per_stage), temperature=0.0, top_p=1.0, detokenize=False)
for _ in range(omni.num_stages)
Expand Down
5 changes: 4 additions & 1 deletion examples/offline_inference/fish_speech/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from vllm.utils.argparse_utils import FlexibleArgumentParser

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni import AsyncOmni, Omni
from vllm_omni.model_executor.models.fish_speech.dac_utils import DAC_HOP_LENGTH, DAC_SAMPLE_RATE
from vllm_omni.model_executor.models.fish_speech.prompt_utils import (
Expand Down Expand Up @@ -149,6 +150,7 @@ def main(args):

omni = Omni(
model=model_name,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=stage_configs_path,
log_stats=args.log_stats,
stage_init_timeout=args.stage_init_timeout,
Expand Down Expand Up @@ -185,6 +187,7 @@ async def main_streaming(args):

omni = AsyncOmni(
model=model_name,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=stage_configs_path,
log_stats=args.log_stats,
stage_init_timeout=args.stage_init_timeout,
Expand Down Expand Up @@ -273,7 +276,7 @@ def parse_args():
default=False,
help="Stream audio chunks as they arrive via AsyncOmni.",
)
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion examples/offline_inference/glm_image/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

from PIL import Image

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams

Expand Down Expand Up @@ -260,6 +261,7 @@ def main(args: argparse.Namespace) -> None:

omni = Omni(
model=args.model_path,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=config_path,
log_stats=args.enable_stats,
stage_init_timeout=args.stage_init_timeout,
Expand Down Expand Up @@ -503,7 +505,7 @@ def parse_args() -> argparse.Namespace:
help="Enable diffusion pipeline profiler to display stage durations.",
)

return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion examples/offline_inference/helios/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import torch

from vllm_omni.diffusion.data import DiffusionParallelConfig
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams
from vllm_omni.outputs import OmniRequestOutput
Expand Down Expand Up @@ -196,7 +197,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--cfg-parallel-size", type=int, default=1, choices=[1, 2], help="CFG parallel size.")
parser.add_argument("--tensor-parallel-size", type=int, default=1, help="Tensor parallelism size.")

return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def main():
Expand All @@ -212,6 +213,7 @@ def main():

omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
enable_layerwise_offload=args.enable_layerwise_offload,
vae_use_slicing=args.vae_use_slicing,
vae_use_tiling=args.vae_use_tiling,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from PIL import Image

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni

"""
Expand Down Expand Up @@ -46,7 +47,7 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Enable diffusion pipeline profiler to display stage durations.",
)
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def load_image(image_path: str) -> Image.Image:
Expand All @@ -59,6 +60,7 @@ def load_image(image_path: str) -> Image.Image:
def main(args: argparse.Namespace) -> None:
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
enable_diffusion_pipeline_profiler=args.enable_diffusion_pipeline_profiler,
mode="image-to-text",
)
Expand Down
4 changes: 3 additions & 1 deletion examples/offline_inference/image_to_image/image_edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
from PIL import Image

from vllm_omni.diffusion.data import DiffusionParallelConfig
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams
from vllm_omni.outputs import OmniRequestOutput
Expand Down Expand Up @@ -330,7 +331,7 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Enable diffusion pipeline profiler to display stage durations.",
)
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def main():
Expand Down Expand Up @@ -386,6 +387,7 @@ def main():
# Initialize Omni with appropriate pipeline
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
enable_layerwise_offload=args.enable_layerwise_offload,
vae_use_slicing=args.vae_use_slicing,
vae_use_tiling=args.vae_use_tiling,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import torch

from vllm_omni.diffusion.data import DiffusionParallelConfig
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams
from vllm_omni.outputs import OmniRequestOutput
Expand Down Expand Up @@ -187,7 +188,7 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Enable diffusion pipeline profiler to display stage durations.",
)
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def calculate_dimensions(
Expand Down Expand Up @@ -281,6 +282,7 @@ def main():
)
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
enable_layerwise_offload=args.enable_layerwise_offload,
vae_use_slicing=args.vae_use_slicing,
vae_use_tiling=args.vae_use_tiling,
Expand Down
4 changes: 3 additions & 1 deletion examples/offline_inference/magi_human/end2end.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse

from vllm_omni.diffusion.utils.media_utils import mux_video_audio_bytes
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniDiffusionSamplingParams

Expand All @@ -24,7 +25,7 @@ def parse_args():
parser.add_argument("--width", type=int, default=448, help="Video width.")
parser.add_argument("--num-inference-steps", type=int, default=8, help="Number of denoising steps.")
parser.add_argument("--seed", type=int, default=52, help="Random seed for generation.")
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def main():
Expand All @@ -33,6 +34,7 @@ def main():
print(f"Initializing MagiHuman pipeline with TP={args.tensor_parallel_size}...")
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
init_timeout=1200,
tensor_parallel_size=args.tensor_parallel_size,
devices=list(range(args.tensor_parallel_size)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from vllm.multimodal.image import convert_image_mode

from vllm_omni import Omni
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides

DEFAULT_SYSTEM = "You are a helpful assistant."
DEFAULT_QUESTION = "Please summarize the content of this image."
Expand Down Expand Up @@ -48,7 +49,7 @@ def parse_args() -> argparse.Namespace:
action="store_true",
help="Enable diffusion pipeline profiler to display stage durations.",
)
return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


def build_prompt(system: str, question: str) -> str:
Expand All @@ -73,6 +74,7 @@ def main() -> None:

omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=args.stage_config,
trust_remote_code=args.trust_remote_code,
enable_diffusion_pipeline_profiler=args.enable_diffusion_pipeline_profiler,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from vllm.sampling_params import SamplingParams

from vllm_omni import Omni
from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -117,7 +118,7 @@ def parse_args() -> argparse.Namespace:
)
p.add_argument("--out", type=str, default="output.png", help="Path to save the generated image.")
p.add_argument("--trust-remote-code", action="store_true", help="Trust remote code when loading the model.")
args = p.parse_args()
args = parse_args_with_explicit_overrides(p)
if not args.prompt:
args.prompt = ["A stylish woman with sunglasses riding a motorcycle in NYC."]
return args
Expand Down Expand Up @@ -194,7 +195,12 @@ def main() -> None:
expected_grid_tokens = ar_height * (ar_width + 1)

logger.info("Initializing Omni pipeline...")
omni = Omni(model=args.model, stage_configs_path=args.stage_config, trust_remote_code=args.trust_remote_code)
omni = Omni(
model=args.model,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=args.stage_config,
trust_remote_code=args.trust_remote_code,
)
try:
ar_sampling = SamplingParams(
temperature=1.0,
Expand Down
4 changes: 3 additions & 1 deletion examples/offline_inference/mimo_audio/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from vllm import SamplingParams
from vllm.utils.argparse_utils import FlexibleArgumentParser

from vllm_omni.entrypoints.utils import parse_args_with_explicit_overrides
from vllm_omni.entrypoints.omni import Omni
from vllm_omni.inputs.data import OmniTokensPrompt

Expand Down Expand Up @@ -182,6 +183,7 @@ def main(args):

omni = Omni(
model=model_name,
explicit_overrides=getattr(args, "_explicit_overrides", None),
stage_configs_path=args.stage_configs_path,
log_stats=args.enable_stats,
log_file=("omni_pipeline.log" if args.enable_stats else None),
Expand Down Expand Up @@ -434,7 +436,7 @@ def parse_args():
help="Path to a stage configs file.",
)

return parser.parse_args()
return parse_args_with_explicit_overrides(parser)


if __name__ == "__main__":
Expand Down
Loading
Loading