Skip to content
Merged
Show file tree
Hide file tree
Changes from 106 commits
Commits
Show all changes
112 commits
Select commit Hold shift + click to select a range
7f13f23
init
mickqian Oct 25, 2025
bbead9b
update pyproject.toml
mickqian Oct 25, 2025
c0d2b00
init cli (in progress)
mickqian Oct 25, 2025
4bda859
update pre-commit
mickqian Oct 25, 2025
d253519
replace path prefix from sgl-diffusion to sglang.multimodal_gen
mickqian Oct 25, 2025
9dec467
update precommit config
mickqian Oct 25, 2025
4f15b76
feat: generate command
mickqian Oct 25, 2025
721033e
doc: update README about cli
mickqian Oct 25, 2025
8a18b33
doc: update README about ack
mickqian Oct 25, 2025
bdbcb92
doc: integrate csrc component and its submodules into the project
mickqian Oct 25, 2025
57cdc2b
fix: fix tk path
mickqian Oct 25, 2025
8740f6a
Pin ThunderKittens submodules to revision 6c27e28
mickqian Oct 25, 2025
b404f64
chore: upgrade openai to 2.6.1
mickqian Oct 25, 2025
fc5bcf1
chore: sync commits from sgl-diffusion
mickqian Oct 25, 2025
89d0aa0
multimodal_gen http server supports i2v
yhyang201 Oct 25, 2025
45e7f4c
fix lint
yhyang201 Oct 25, 2025
ccaf154
fix
yhyang201 Oct 26, 2025
9f69306
fix: fix attn backend
mickqian Oct 26, 2025
8afc45c
bugfix
Oct 26, 2025
e97e143
fix hunyuan[tmp]
yizhang2077 Oct 26, 2025
3e707a0
small fix
yizhang2077 Oct 26, 2025
f130039
model: support Qwen-Image-Edit
mickqian Oct 26, 2025
f5bf7ab
clean
mickqian Oct 26, 2025
b232abb
tiny fix
yizhang2077 Oct 27, 2025
1d3066d
fix: fix dit on cpu when forwarding
mickqian Oct 27, 2025
8c8aace
doc: update support_matrix.md
mickqian Oct 27, 2025
a9e2a1a
perf: improve qwen-image
mickqian Oct 27, 2025
9a4f3b8
fix fused_add_rmsnorm shape
JustinTong0323 Oct 28, 2025
beb940d
lint
JustinTong0323 Oct 28, 2025
206ec93
minor fix prepare_neg_cond_kwargs
mickqian Oct 28, 2025
26701c5
skip empty text_encoder_extra_arg
JustinTong0323 Oct 28, 2025
fb51b10
fix rope_emb.forward_from_grid
JustinTong0323 Oct 28, 2025
9c01676
add copyright
mickqian Oct 28, 2025
ff7fd59
remove markdown copyright
mickqian Oct 28, 2025
887a158
wip hunyuan
JustinTong0323 Oct 28, 2025
4c55f26
remove markdown copyright
mickqian Oct 28, 2025
1c28b4b
fix: prepare_pos_cond_kwargs
mickqian Oct 28, 2025
3bc5137
fix fastwan2.1
yizhang2077 Oct 28, 2025
b4d3409
export THUNDERKITTENS_ROOT=/sgl-workspace/sglang/python/sglang/multim…
mickqian Oct 28, 2025
9fbb612
update install.md
mickqian Oct 28, 2025
51f7dd5
fix typo
mickqian Oct 28, 2025
497d72f
update test threshold
mickqian Oct 28, 2025
501cc14
fix: fix image encoding
mickqian Oct 28, 2025
5841b9f
fix: fix 'fix image encoding'
mickqian Oct 29, 2025
374c09c
replace RMS and cleanup
JustinTong0323 Oct 29, 2025
248dfba
Merge pull request #3 from sglang-bot/hunyuan
JustinTong0323 Oct 29, 2025
bde9a6c
fix stepvideo
yizhang2077 Oct 29, 2025
e0ab554
support image edit
yhyang201 Oct 29, 2025
7c8a644
remove print log
ispobock Oct 29, 2025
07ef784
cleanup wan
ispobock Oct 29, 2025
ff5b0d6
fix lint
ispobock Oct 29, 2025
aeac2a5
move config
ispobock Oct 30, 2025
fec9570
move config
ispobock Oct 30, 2025
2a53448
update
yhyang201 Oct 30, 2025
9d25152
fix ulysses
mickqian Oct 30, 2025
e7a58b9
improve
mickqian Oct 30, 2025
c3a1cc9
remove HAS_LONG_CTX and HAS_FLASH_ATTENTION
mickqian Oct 30, 2025
435cbc1
fix wrong comment
mickqian Oct 30, 2025
aad40fc
fix ring attn
yhyang201 Oct 30, 2025
69b4be1
fix usp
mickqian Oct 30, 2025
76f19e5
refactor
mickqian Oct 30, 2025
bbb3bc1
fix usp
mickqian Oct 30, 2025
1217794
refactor
mickqian Oct 30, 2025
26a97d3
Merge remote-tracking branch 'diffusion/sgl-diffusion' into sp
mickqian Oct 30, 2025
ee6d12a
fix ring attn
yhyang201 Oct 30, 2025
f00e9ad
remove from dev
ispobock Oct 30, 2025
fb83d72
format pyproject
ispobock Oct 30, 2025
df47719
test: upload ti2v perf
mickqian Oct 31, 2025
7e81fad
input validation: resize
mickqian Oct 31, 2025
dcb87d6
fix wani2v resize image
mickqian Oct 30, 2025
08c15ab
auto set ulysses=sp when not specified
mickqian Oct 31, 2025
1bf4e68
simplify wani2v judgement
mickqian Oct 31, 2025
b1b015e
wani2v
mickqian Oct 30, 2025
7b88fa1
fix illegal mem access for Wan2_2 TI2V
mickqian Oct 31, 2025
ea0a80f
use sta & vsa from pypi
mickqian Oct 31, 2025
fa3af1d
add pytest to pyproject.toml (required by vsa)
mickqian Oct 31, 2025
f7d42d7
update test
mickqian Oct 31, 2025
edd125b
remove debug
mickqian Oct 31, 2025
edb4e61
change supported_attentions' type from tuple to set
mickqian Oct 31, 2025
bf81875
fix: add @property back to temporal_compression_ratio
mickqian Oct 31, 2025
a3ad647
test: refactor generate tests
mickqian Nov 1, 2025
f3877f9
refactor: refactor attention_backends
mickqian Nov 1, 2025
0a9f5be
refactor: remove sta & vsa
mickqian Nov 1, 2025
dd7a1f1
doc: update support matrix
mickqian Nov 1, 2025
cf98e86
Merge remote-tracking branch 'origin/main' into diffusion-test
mickqian Nov 1, 2025
d2462b1
update pre-commit
mickqian Nov 1, 2025
6ba441f
clean
mickqian Nov 1, 2025
be56bf3
update test
mickqian Nov 1, 2025
405edbb
combine modulate triton kernels
mickqian Nov 1, 2025
b4320a7
update pyproject.toml format
mickqian Nov 2, 2025
367063b
remove gitmodules
mickqian Nov 2, 2025
0a627f2
simplify cli launch_server
mickqian Nov 2, 2025
4e394d0
make get_is_diffusion_model more robust by downloading config files f…
mickqian Nov 2, 2025
67d4dea
run_server for text models
mickqian Nov 2, 2025
8b9e8a3
refactor qwen-image-edit's ImageVAEEncoding
mickqian Nov 2, 2025
444f068
add save file name sanitize
mickqian Nov 2, 2025
dea29b5
update doc
mickqian Nov 2, 2025
40cea2a
Merge branch 'main' into diffusion
mickqian Nov 2, 2025
4de70ac
update pyproject.toml
mickqian Nov 2, 2025
ab81b0a
update Dockerfile.diffusion
mickqian Nov 2, 2025
c2191ef
update install.md
mickqian Nov 2, 2025
cb4b615
fix pyproject.toml
mickqian Nov 3, 2025
7721076
remove comments & enable serve ci
yhyang201 Nov 3, 2025
d7e98fe
Merge branch 'main' into diffusion
mickqian Nov 3, 2025
51cb7c0
fix wani2v resize image
mickqian Oct 30, 2025
998bdf7
update doc
mickqian Nov 3, 2025
70ee62b
typo
mickqian Nov 3, 2025
1f57577
cleanup
mickqian Nov 3, 2025
0a08544
more
mickqian Nov 3, 2025
5fd2188
fix
mickqian Nov 3, 2025
6e296f0
fix
mickqian Nov 3, 2025
8a0300b
fix
mickqian Nov 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
default_stages: [pre-commit, pre-push, manual]
exclude: ^python/sglang/multimodal_gen/csrc

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down Expand Up @@ -31,7 +32,15 @@ repos:
- --select=F401,F821
- --fix
files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*)
exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
exclude: |
(?x)^(
.*/__init__\.py$|
.*\.ipynb$|
python/sglang/srt/grpc/.*_pb2\.py$|
python/sglang/srt/grpc/.*_pb2_grpc\.py$|
python/sglang/srt/grpc/.*_pb2\.pyi$|
python/sglang/srt/grpc/.*_pb2_grpc\.pyi$|
)$
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
Expand Down
104 changes: 104 additions & 0 deletions docker/Dockerfile.diffusion
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive

SHELL ["/bin/bash", "-c"]

WORKDIR /sgl-workspace/sglang

RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
git \
ca-certificates \
openssh-server \
zsh \
vim \
curl \
gcc-11 \
g++-11 \
clang-11 \
libnuma1 libnuma-dev \
&& rm -rf /var/lib/apt/lists/*

# Install oh-my-zsh and plugins
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
&& git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
&& git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting


# Set up C++20 compilers for ThunderKittens
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11

# Set CUDA environment variables
ENV CUDA_HOME=/usr/local/cuda-12.8
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH

# Install uv and source its environment
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
echo 'source $HOME/.local/bin/env' >> /root/.zshrc

# Copy just the pyproject.toml first to leverage Docker cache
COPY python/pyproject.toml python/

# Create a dummy README to satisfy the installation
RUN mkdir -p python && echo "# Placeholder" > python/README.md

# Create and activate virtual environment with specific Python version and seed
RUN source $HOME/.local/bin/env && \
uv venv --python 3.12 --seed /opt/venv && \
source /opt/venv/bin/activate && \
uv pip install nvitop && \
uv pip install --no-cache-dir --upgrade pip && \
uv pip install --no-cache-dir --prerelease=allow./python[diffusion]

COPY . .

# Install dependencies using uv and set up shell configuration
RUN source $HOME/.local/bin/env && \
source /opt/venv/bin/activate && \
git config --unset-all http.https://github.com/.extraheader || true && \
echo 'source /opt/venv/bin/activate' >> /root/.zshrc && \
echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile

# Set PATH to include venv bin
ENV PATH=/opt/venv/bin:$PATH

# Configure zsh
COPY --chown=root:root <<-"EOF" /root/.zshrc
export ZSH="/root/.oh-my-zsh"

source $HOME/.local/bin/env
source /opt/venv/bin/activate

## Theme
ZSH_THEME="robbyrussell"

## Plugins
plugins=(
git
z
zsh-autosuggestions
zsh-syntax-highlighting
)

source $ZSH/oh-my-zsh.sh

## Aliases
alias ll='ls -alF'
alias la='ls -A'
alias l='ls -CF'
alias vi='vim'

## Enhanced history
HISTSIZE=10000
SAVEHIST=10000
setopt HIST_IGNORE_ALL_DUPS
setopt HIST_FIND_NO_DUPS
setopt INC_APPEND_HISTORY
EOF


EXPOSE 22

CMD ["/bin/zsh"]
22 changes: 22 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,25 @@ dependencies = [

[project.optional-dependencies]
checkpoint-engine = ["checkpoint-engine==0.1.2"]
diffusion = [
"diffusers==0.35.2",
"yunchang==0.6.3.post1",
"opencv-python==4.10.0.84",
"imageio==2.36.0",
"imageio-ffmpeg==0.5.1",
"PyYAML==6.0.1",
"moviepy>=2.0.0",
"cloudpickle",
"remote-pdb",
"torchcodec==0.5.0",
"st_attn ==0.0.7",
"vsa==0.0.4",
]

[tool.uv.extra-build-dependencies]
st-attn = ["torch", "setuptools"]
vsa = ["torch", "setuptools"]

test = [
"accelerate",
"expecttest",
Expand All @@ -103,6 +122,9 @@ tracing = [
"Homepage" = "https://github.com/sgl-project/sglang"
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"

[project.scripts]
sglang = "sglang.cli.main:main"

[tool.setuptools.package-data]
"sglang" = [
"srt/layers/moe/fused_moe_triton/configs/*/*.json",
Expand Down
Empty file added python/sglang/cli/__init__.py
Empty file.
21 changes: 21 additions & 0 deletions python/sglang/cli/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import argparse

from sglang.cli.main import get_is_diffusion_model, get_model_path
from sglang.multimodal_gen.runtime.entrypoints.cli.generate import (
add_multimodal_gen_generate_args,
generate_cmd,
)


def generate(args, extra_argv):
model_path = get_model_path(extra_argv)
is_diffusion_model = get_is_diffusion_model(model_path)
if is_diffusion_model:
parser = argparse.ArgumentParser(description="SGLang Multimodal Generation")
add_multimodal_gen_generate_args(parser)
parsed_args = parser.parse_args(extra_argv)
generate_cmd(parsed_args)
else:
raise Exception(
f"Generate subcommand is not supported for model: {model_path} for now"
)
180 changes: 180 additions & 0 deletions python/sglang/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import argparse
import hashlib
import json
import logging
import os
import tempfile
from typing import Optional

import filelock
from huggingface_hub import hf_hub_download

logger = logging.getLogger(__name__)

temp_dir = tempfile.gettempdir()


def _get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
lock_dir = cache_dir or temp_dir
os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
model_name = model_name_or_path.replace("/", "-")
hash_name = hashlib.sha256(model_name.encode()).hexdigest()
# add hash to avoid conflict with old users' lock files
lock_file_name = hash_name + model_name + ".lock"
# mode 0o666 is required for the filelock to be shared across users
lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
return lock


# Copied and adapted from hf_diffusers_utils.py
def _maybe_download_model(
model_name_or_path: str, local_dir: str | None = None, download: bool = True
) -> str:
"""
Resolve a model path. If it's a local directory, return it.
If it's a Hugging Face Hub ID, download only the config file
(`model_index.json` or `config.json`) and return its directory.

Args:
model_name_or_path: Local path or Hugging Face Hub model ID
local_dir: Local directory to save the downloaded file (if any)
download: Whether to download from Hugging Face Hub when needed

Returns:
Local directory path that contains the downloaded config file, or the original local directory.
"""

if os.path.exists(model_name_or_path):
logger.info("Model already exists locally")
return model_name_or_path

if not download:
return model_name_or_path

with _get_lock(model_name_or_path):
# Try `model_index.json` first (diffusers models)
try:
logger.info(
"Downloading model_index.json from HF Hub for %s...",
model_name_or_path,
)
file_path = hf_hub_download(
repo_id=model_name_or_path,
filename="model_index.json",
local_dir=local_dir,
)
logger.info("Downloaded to %s", file_path)
return os.path.dirname(file_path)
except Exception as e_index:
logger.debug("model_index.json not found or failed: %s", e_index)

# Fallback to `config.json`
try:
logger.info(
"Downloading config.json from HF Hub for %s...", model_name_or_path
)
file_path = hf_hub_download(
repo_id=model_name_or_path,
filename="config.json",
local_dir=local_dir,
)
logger.info("Downloaded to %s", file_path)
return os.path.dirname(file_path)
except Exception as e_config:
raise ValueError(
(
"Could not find model locally at %s and failed to download "
"model_index.json/config.json from HF Hub: %s"
)
% (model_name_or_path, e_config)
) from e_config


# Copied and adapted from hf_diffusers_utils.py
def _verify_model_config_and_directory(model_path: str) -> True:
"""
Verify if the model directory contains a valid diffusers configuration.

Args:
model_path: Path to the model directory

Returns:
The loaded model configuration as a dictionary if the model is a diffusers model
None if the model is not a diffusers model
"""

# Prefer model_index.json which indicates a diffusers pipeline
config_path = os.path.join(model_path, "model_index.json")
if not os.path.exists(config_path):
return None

# Load the config
with open(config_path) as f:
config = json.load(f)

# Verify diffusers version exists
if "_diffusers_version" not in config:
return None
return True


def get_is_diffusion_model(model_path: str):
model_path = _maybe_download_model(model_path)
return _verify_model_config_and_directory(model_path)


def get_model_path(extra_argv):
# Find the model_path argument
model_path = None
for i, arg in enumerate(extra_argv):
if arg == "--model-path":
if i + 1 < len(extra_argv):
model_path = extra_argv[i + 1]
break
elif arg.startswith("--model-path="):
model_path = arg.split("=", 1)[1]
break

if model_path is None:
# Fallback for --help or other cases where model-path is not provided
if any(h in extra_argv for h in ["-h", "--help"]):
raise Exception(
"Usage: sglang serve --model-path <model-name-or-path> [additional-arguments]\n\n"
"This command can launch either a standard language model server or a diffusion model server.\n"
"The server type is determined by the model path.\n"
"For specific arguments, please provide a model_path."
)
else:
raise Exception(
"Error: --model-path is required. "
"Please provide the path to the model."
)
return model_path


def main():
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="subcommand", required=True)

# serve subcommand
from sglang.cli.serve import serve

serve_parser = subparsers.add_parser(
"serve",
help="Launch the SGLang server.",
add_help=False, # Defer help to the specific parser
)
serve_parser.set_defaults(func=serve)

# generate subcommand
from sglang.cli.generate import generate

generate_parser = subparsers.add_parser(
"generate",
help="Run inference on a multimodal model.",
add_help=False, # Defer help to the specific parser
)
generate_parser.set_defaults(func=generate)

args, extra_argv = parser.parse_known_args()
args.func(args, extra_argv)
Loading
Loading