Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ th {
|`Flux2Pipeline` | FLUX.2-dev | `black-forest-labs/FLUX.2-dev` |
|`FishSpeechSlowARForConditionalGeneration` | Fish Speech S2 Pro | `fishaudio/s2-pro` |
|`DreamIDOmniPipeline`| DreamID-Omni | `XuGuo699/DreamID-Omni` |
|`LingbotWorldPipeline` | LingBot-World Base (Cam) | `robbyant/lingbot-world-base-cam` |


## List of Supported Models for NPU
Expand Down
56 changes: 56 additions & 0 deletions examples/offline_inference/lingbot-world/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# LingBot-World

Offline LingBot-World examples.

## Download

```bash
python download_lingbot_world.py \
--model-id robbyant/lingbot-world-base-cam \
--output-dir ./lingbot-world-base-cam
```

The prepared model directory looks like this:

```text
lingbot-world-base-cam/
├── configuration.json
├── google/
├── high_noise_model/
├── low_noise_model/
├── models_t5_umt5-xxl-enc-bf16.pth
├── model_index.json
└── Wan2.1_VAE.pth
```

## Run With Control Signals

```bash
PROMPT="$(cat /tmp/vllm-omni-dependency/lingbot-world/examples/00/prompt.txt)"

python image_to_video.py \
--model ./lingbot-world-base-cam \
--image /tmp/vllm-omni-dependency/lingbot-world/examples/00/image.jpg \
--action-path /tmp/vllm-omni-dependency/lingbot-world/examples/00 \
--prompt "$PROMPT" \
--output lingbot_world_base_cam_examples00.mp4
```

## Run Without Control Signals

```bash
PROMPT="$(cat /tmp/vllm-omni-dependency/lingbot-world/examples/00/prompt.txt)"

python image_to_video.py \
--model ./lingbot-world-base-cam \
--image /tmp/vllm-omni-dependency/lingbot-world/examples/00/image.jpg \
--prompt "$PROMPT" \
--output lingbot_world_base_cam_no_control.mp4
```

## Notes

- `--action-path` is optional.
- For `LingBot-World-Base (Cam)`, control signals should contain `poses.npy` and `intrinsics.npy`.
- For `LingBot-World-Base (Act)`, `action.npy` is also required.
- `--enable-cpu-offload` is supported for offline inference.
147 changes: 147 additions & 0 deletions examples/offline_inference/lingbot-world/download_lingbot_world.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from __future__ import annotations

import argparse
import fcntl
import json
import os
import site
import subprocess
import tempfile
import time
from pathlib import Path

try:
from huggingface_hub import snapshot_download
except ImportError:
snapshot_download = None


DEFAULT_MODEL_ID = "robbyant/lingbot-world-base-cam"
DEFAULT_OUTPUT_DIR = "./lingbot-world-base-cam"
DEFAULT_CLASS_NAME = "LingbotWorldPipeline"
DEPENDENCY_REPO = "https://github.com/robbyant/lingbot-world.git"
DEPENDENCY_BRANCH = "main"
CACHE_DIR = Path(tempfile.gettempdir()) / "vllm-omni-dependency"
LOCK_FILE = CACHE_DIR / ".lingbot_world_install.lock"
DEPENDENCY_DIR = CACHE_DIR / "lingbot-world"
PTH_FILE_NAME = "vllm_omni_lingbot_world_dependency.pth"

REQUIRED_FILES = (
"configuration.json",
"models_t5_umt5-xxl-enc-bf16.pth",
"Wan2.1_VAE.pth",
"low_noise_model/config.json",
"high_noise_model/config.json",
)


def infer_control_type(model_ref: str) -> str:
model_ref = model_ref.lower()
if "act" in model_ref:
return "act"
return "cam"


def ensure_model_index(
output_dir: Path,
*,
class_name: str = DEFAULT_CLASS_NAME,
control_type: str | None = None,
) -> Path:
output_dir.mkdir(parents=True, exist_ok=True)
model_index_path = output_dir / "model_index.json"
payload = {
"_class_name": class_name,
"control_type": control_type or infer_control_type(str(output_dir)),
}
model_index_path.write_text(json.dumps(payload, indent=2) + "\n")
return model_index_path


def validate_model_directory(output_dir: Path) -> None:
missing = [rel_path for rel_path in REQUIRED_FILES if not (output_dir / rel_path).exists()]
if missing:
raise FileNotFoundError("LingBot-World download is incomplete. Missing files: " + ", ".join(sorted(missing)))


def download_dependency() -> Path:
CACHE_DIR.mkdir(parents=True, exist_ok=True)

with open(LOCK_FILE, "w") as lock_file:
fcntl.flock(lock_file, fcntl.LOCK_EX)
if not DEPENDENCY_DIR.exists():
print(f"Downloading LingBot-World to {DEPENDENCY_DIR} ...")
subprocess.run(
[
"git",
"clone",
"--depth",
"1",
DEPENDENCY_REPO,
"--branch",
DEPENDENCY_BRANCH,
str(DEPENDENCY_DIR),
],
check=True,
)
print("Download finished.")
fcntl.flock(lock_file, fcntl.LOCK_UN)

site_packages = Path(site.getsitepackages()[0])
pth_file = site_packages / PTH_FILE_NAME
pth_file.write_text(f"{DEPENDENCY_DIR}\n", encoding="utf-8")
print(f"Added {DEPENDENCY_DIR} to site-packages via {pth_file}")
return pth_file


def timed_download(repo_id: str, local_dir: str) -> None:
if os.path.exists(local_dir):
print(f"Directory {local_dir} already exists. Skipping download.")
return
if snapshot_download is None:
raise ImportError(
"huggingface_hub is required to download LingBot-World. Install it before running this script."
)
print(f"Starting download from {repo_id} into {local_dir}")
start_time = time.time()

snapshot_download(
repo_id=repo_id,
local_dir=local_dir,
local_dir_use_symlinks=False,
)

elapsed = time.time() - start_time
print(f"Finished downloading {repo_id} in {elapsed:.2f} seconds. Files saved at: {local_dir}")


def download_lingbot_world(model_id: str, output_dir: Path) -> Path:
timed_download(repo_id=model_id, local_dir=str(output_dir))
ensure_model_index(output_dir, control_type=infer_control_type(model_id))
validate_model_directory(output_dir)
return output_dir


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Download LingBot-World from Hugging Face.")
parser.add_argument("--model-id", default=DEFAULT_MODEL_ID, help="Hugging Face model ID to download.")
parser.add_argument(
"--output-dir",
default=DEFAULT_OUTPUT_DIR,
help="Local directory for the prepared model.",
)
return parser.parse_args()


def main(output_dir: str, model_id: str = DEFAULT_MODEL_ID) -> None:
model_dir = download_lingbot_world(model_id, Path(output_dir).expanduser().resolve())
download_dependency()
print(f"Prepared LingBot-World model at: {model_dir}")


if __name__ == "__main__":
args = parse_args()
main(args.output_dir, args.model_id)
Loading