sgl-project · merrymercy · Nov 5, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,5 @@
 default_stages: [pre-commit, pre-push, manual]
+exclude: ^python/sglang/multimodal_gen/csrc
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -31,7 +32,15 @@ repos:
           - --select=F401,F821
           - --fix
         files: ^(benchmark/|docs/|examples/|python/sglang/|sgl-router/py_*)
-        exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
+        exclude: |
+          (?x)^(
+          .*/__init__\.py$|
+          .*\.ipynb$|
+          python/sglang/srt/grpc/.*_pb2\.py$|
+          python/sglang/srt/grpc/.*_pb2_grpc\.py$|
+          python/sglang/srt/grpc/.*_pb2\.pyi$|
+          python/sglang/srt/grpc/.*_pb2_grpc\.pyi$|
+          )$
   - repo: https://github.com/psf/black
     rev: 24.10.0
     hooks:

@@ -0,0 +1,104 @@
+FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+SHELL ["/bin/bash", "-c"]
+
+WORKDIR /sgl-workspace/sglang
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    wget \
+    git \
+    ca-certificates \
+    openssh-server \
+    zsh \
+    vim \
+    curl \
+    gcc-11 \
+    g++-11 \
+    clang-11 \
+    libnuma1 libnuma-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install oh-my-zsh and plugins
+RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
+    && git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
+    && git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
+
+
+# Set up C++20 compilers for ThunderKittens
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11
+
+# Set CUDA environment variables
+ENV CUDA_HOME=/usr/local/cuda-12.8
+ENV PATH=${CUDA_HOME}/bin:${PATH}
+ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:$LD_LIBRARY_PATH
+
+# Install uv and source its environment
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    echo 'source $HOME/.local/bin/env' >> /root/.zshrc
+
+# Copy just the pyproject.toml first to leverage Docker cache
+COPY python/pyproject.toml python/
+
+# Create a dummy README to satisfy the installation
+RUN mkdir -p python && echo "# Placeholder" > python/README.md
+
+# Create and activate virtual environment with specific Python version and seed
+RUN source $HOME/.local/bin/env && \
+    uv venv --python 3.12 --seed /opt/venv && \
+    source /opt/venv/bin/activate && \
+    uv pip install nvitop && \
+    uv pip install --no-cache-dir --upgrade pip && \
+    uv pip install --no-cache-dir --prerelease=allow./python[diffusion]
+
+COPY . .
+
+# Install dependencies using uv and set up shell configuration
+RUN source $HOME/.local/bin/env && \
+    source /opt/venv/bin/activate && \
+    git config --unset-all http.https://github.com/.extraheader || true && \
+    echo 'source /opt/venv/bin/activate' >> /root/.zshrc && \
+    echo 'if [ -n "$ZSH_VERSION" ] && [ -f ~/.zshrc ]; then . ~/.zshrc; elif [ -f ~/.bashrc ]; then . ~/.bashrc; fi' > /root/.profile
+
+# Set PATH to include venv bin
+ENV PATH=/opt/venv/bin:$PATH
+
+# Configure zsh
+COPY --chown=root:root <<-"EOF" /root/.zshrc
+export ZSH="/root/.oh-my-zsh"
+
+source $HOME/.local/bin/env
+source /opt/venv/bin/activate
+
+## Theme
+ZSH_THEME="robbyrussell"
+
+## Plugins
+plugins=(
+    git
+    z
+    zsh-autosuggestions
+    zsh-syntax-highlighting
+)
+
+source $ZSH/oh-my-zsh.sh
+
+## Aliases
+alias ll='ls -alF'
+alias la='ls -A'
+alias l='ls -CF'
+alias vi='vim'
+
+## Enhanced history
+HISTSIZE=10000
+SAVEHIST=10000
+setopt HIST_IGNORE_ALL_DUPS
+setopt HIST_FIND_NO_DUPS
+setopt INC_APPEND_HISTORY
+EOF
+
+
+EXPOSE 22
+
+CMD ["/bin/zsh"]
@@ -80,6 +80,25 @@ dependencies = [
 
 [project.optional-dependencies]
 checkpoint-engine = ["checkpoint-engine==0.1.2"]
+diffusion = [
+    "diffusers==0.35.2",
+    "yunchang==0.6.3.post1",
+    "opencv-python==4.10.0.84",
+    "imageio==2.36.0",
+    "imageio-ffmpeg==0.5.1",
+    "PyYAML==6.0.1",
+    "moviepy>=2.0.0",
+    "cloudpickle",
+    "remote-pdb",
+    "torchcodec==0.5.0",
+    "st_attn ==0.0.7",
+    "vsa==0.0.4",
+]
+
+[tool.uv.extra-build-dependencies]
+st-attn = ["torch", "setuptools"]
+vsa = ["torch", "setuptools"]
+
 test = [
   "accelerate",
   "expecttest",
@@ -103,6 +122,9 @@ tracing = [
 "Homepage" = "https://github.com/sgl-project/sglang"
 "Bug Tracker" = "https://github.com/sgl-project/sglang/issues"
 
+[project.scripts]
+sglang = "sglang.cli.main:main"
+
 [tool.setuptools.package-data]
 "sglang" = [
   "srt/layers/moe/fused_moe_triton/configs/*/*.json",

diff --git a/python/sglang/cli/__init__.py b/python/sglang/cli/__init__.py
diff --git a/python/sglang/cli/generate.py b/python/sglang/cli/generate.py
@@ -0,0 +1,21 @@
+import argparse
+
+from sglang.cli.main import get_is_diffusion_model, get_model_path
+from sglang.multimodal_gen.runtime.entrypoints.cli.generate import (
+    add_multimodal_gen_generate_args,
+    generate_cmd,
+)
+
+
+def generate(args, extra_argv):
+    model_path = get_model_path(extra_argv)
+    is_diffusion_model = get_is_diffusion_model(model_path)
+    if is_diffusion_model:
+        parser = argparse.ArgumentParser(description="SGLang Multimodal Generation")
+        add_multimodal_gen_generate_args(parser)
+        parsed_args = parser.parse_args(extra_argv)
+        generate_cmd(parsed_args)
+    else:
+        raise Exception(
+            f"Generate subcommand is not supported for model: {model_path} for now"
+        )
diff --git a/python/sglang/cli/main.py b/python/sglang/cli/main.py
@@ -0,0 +1,180 @@
+import argparse
+import hashlib
+import json
+import logging
+import os
+import tempfile
+from typing import Optional
+
+import filelock
+from huggingface_hub import hf_hub_download
+
+logger = logging.getLogger(__name__)
+
+temp_dir = tempfile.gettempdir()
+
+
+def _get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
+    lock_dir = cache_dir or temp_dir
+    os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
+    model_name = model_name_or_path.replace("/", "-")
+    hash_name = hashlib.sha256(model_name.encode()).hexdigest()
+    # add hash to avoid conflict with old users' lock files
+    lock_file_name = hash_name + model_name + ".lock"
+    # mode 0o666 is required for the filelock to be shared across users
+    lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name), mode=0o666)
+    return lock
+
+
+# Copied and adapted from hf_diffusers_utils.py
+def _maybe_download_model(
+    model_name_or_path: str, local_dir: str | None = None, download: bool = True
+) -> str:
+    """
+    Resolve a model path. If it's a local directory, return it.
+    If it's a Hugging Face Hub ID, download only the config file
+    (`model_index.json` or `config.json`) and return its directory.
+
+    Args:
+        model_name_or_path: Local path or Hugging Face Hub model ID
+        local_dir: Local directory to save the downloaded file (if any)
+        download: Whether to download from Hugging Face Hub when needed
+
+    Returns:
+        Local directory path that contains the downloaded config file, or the original local directory.
+    """
+
+    if os.path.exists(model_name_or_path):
+        logger.info("Model already exists locally")
+        return model_name_or_path
+
+    if not download:
+        return model_name_or_path
+
+    with _get_lock(model_name_or_path):
+        # Try `model_index.json` first (diffusers models)
+        try:
+            logger.info(
+                "Downloading model_index.json from HF Hub for %s...",
+                model_name_or_path,
+            )
+            file_path = hf_hub_download(
+                repo_id=model_name_or_path,
+                filename="model_index.json",
+                local_dir=local_dir,
+            )
+            logger.info("Downloaded to %s", file_path)
+            return os.path.dirname(file_path)
+        except Exception as e_index:
+            logger.debug("model_index.json not found or failed: %s", e_index)
+
+        # Fallback to `config.json`
+        try:
+            logger.info(
+                "Downloading config.json from HF Hub for %s...", model_name_or_path
+            )
+            file_path = hf_hub_download(
+                repo_id=model_name_or_path,
+                filename="config.json",
+                local_dir=local_dir,
+            )
+            logger.info("Downloaded to %s", file_path)
+            return os.path.dirname(file_path)
+        except Exception as e_config:
+            raise ValueError(
+                (
+                    "Could not find model locally at %s and failed to download "
+                    "model_index.json/config.json from HF Hub: %s"
+                )
+                % (model_name_or_path, e_config)
+            ) from e_config
+
+
+# Copied and adapted from hf_diffusers_utils.py
+def _verify_model_config_and_directory(model_path: str) -> True:
+    """
+    Verify if the model directory contains a valid diffusers configuration.
+
+    Args:
+        model_path: Path to the model directory
+
+    Returns:
+        The loaded model configuration as a dictionary if the model is a diffusers model
+        None if the model is not a diffusers model
+    """
+
+    # Prefer model_index.json which indicates a diffusers pipeline
+    config_path = os.path.join(model_path, "model_index.json")
+    if not os.path.exists(config_path):
+        return None
+
+    # Load the config
+    with open(config_path) as f:
+        config = json.load(f)
+
+    # Verify diffusers version exists
+    if "_diffusers_version" not in config:
+        return None
+    return True
+
+
+def get_is_diffusion_model(model_path: str):
+    model_path = _maybe_download_model(model_path)
+    return _verify_model_config_and_directory(model_path)
+
+
+def get_model_path(extra_argv):
+    # Find the model_path argument
+    model_path = None
+    for i, arg in enumerate(extra_argv):
+        if arg == "--model-path":
+            if i + 1 < len(extra_argv):
+                model_path = extra_argv[i + 1]
+                break
+        elif arg.startswith("--model-path="):
+            model_path = arg.split("=", 1)[1]
+            break
+
+    if model_path is None:
+        # Fallback for --help or other cases where model-path is not provided
+        if any(h in extra_argv for h in ["-h", "--help"]):
+            raise Exception(
+                "Usage: sglang serve --model-path <model-name-or-path> [additional-arguments]\n\n"
+                "This command can launch either a standard language model server or a diffusion model server.\n"
+                "The server type is determined by the model path.\n"
+                "For specific arguments, please provide a model_path."
+            )
+        else:
+            raise Exception(
+                "Error: --model-path is required. "
+                "Please provide the path to the model."
+            )
+    return model_path
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="subcommand", required=True)
+
+    # serve subcommand
+    from sglang.cli.serve import serve
+
+    serve_parser = subparsers.add_parser(
+        "serve",
+        help="Launch the SGLang server.",
+        add_help=False,  # Defer help to the specific parser
+    )
+    serve_parser.set_defaults(func=serve)
+
+    # generate subcommand
+    from sglang.cli.generate import generate
+
+    generate_parser = subparsers.add_parser(
+        "generate",
+        help="Run inference on a multimodal model.",
+        add_help=False,  # Defer help to the specific parser
+    )
+    generate_parser.set_defaults(func=generate)
+
+    args, extra_argv = parser.parse_known_args()
+    args.func(args, extra_argv)