Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ RUN --mount=type=cache,target=/root/.cache/pip \
fi && \
TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" MAX_JOBS=${BUILD_AND_DOWNLOAD_PARALLEL} pip install --no-build-isolation .

# Install flash_attn
# flash_attn is required by transformer 5.x in case if not yet installed
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -c "import flash_attn" 2>/dev/null \
|| python3 -m pip install flash_attn --no-build-isolation

# Install essential Python packages
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install \
Expand Down
4 changes: 2 additions & 2 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ dependencies = [
"torch_memory_saver==0.0.9",
"torch==2.9.1",
"torchaudio==2.9.1",
"torchcodec==0.7.0 ; sys_platform != 'linux' or (sys_platform == 'linux' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')", # torchcodec does not exist in those systems. If not provided, transformer will use torchvision instead by default.
"torchcodec==0.8.0 ; sys_platform != 'linux' or (sys_platform == 'linux' and platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l')", # torchcodec does not exist in those systems. If not provided, transformer will use torchvision instead by default.
"torchvision",
"torchao==0.9.0",
"tqdm",
"transformers==4.57.1",
"transformers==5.0.0rc0",
"uvicorn",
"uvloop",
"xgrammar==0.1.27",
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_cpu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ dependencies = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==4.57.1",
"transformers==5.0.0rc0",
"uvicorn",
"uvloop",
"xgrammar==0.1.27",
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_other.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ runtime_common = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==4.57.1",
"transformers==5.0.0rc0",
"uvicorn",
"uvloop",
"xgrammar==0.1.27",
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject_xpu.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==4.57.1",
"transformers==5.0.0rc0",
"uvicorn",
"uvloop",
# "xgrammar==0.1.24", , xgrammar depends on CUDA PyTorch and Triton only
Expand Down
4 changes: 0 additions & 4 deletions python/sglang/srt/configs/qwen3_omni.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from transformers import PretrainedConfig
from transformers.configuration_utils import layer_type_validation
from transformers.modeling_rope_utils import rope_config_validation

from sglang.utils import logger

Expand Down Expand Up @@ -168,7 +167,6 @@ def __init__(
# BC: if there is a 'type' field, move it to 'rope_type'.
if self.rope_scaling is not None and "type" in self.rope_scaling:
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
rope_config_validation(self)

# MoE arguments
self.decoder_sparse_step = decoder_sparse_step
Expand Down Expand Up @@ -311,7 +309,6 @@ def __init__(
# BC: if there is a 'type' field, move it to 'rope_type'.
if self.rope_scaling is not None and "type" in self.rope_scaling:
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
rope_config_validation(self)

self.layer_types = layer_types
if self.layer_types is None:
Expand Down Expand Up @@ -405,7 +402,6 @@ def __init__(
# BC: if there is a 'type' field, move it to 'rope_type'.
if self.rope_scaling is not None and "type" in self.rope_scaling:
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
rope_config_validation(self)

# MoE arguments
self.decoder_sparse_step = decoder_sparse_step
Expand Down
7 changes: 4 additions & 3 deletions python/sglang/srt/configs/qwen3_vl.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from transformers import PretrainedConfig
from transformers.modeling_rope_utils import rope_config_validation

# from transformers.modeling_rope_utils import rope_config_validation


class Qwen3VLVisionConfig(PretrainedConfig):
Expand Down Expand Up @@ -187,7 +188,7 @@ def __init__(
self.attention_bias = attention_bias
self.attention_dropout = attention_dropout

rope_config_validation(self, ignore_keys={"mrope_section", "mrope_interleaved"})
# rope_config_validation(self, ignore_keys={"mrope_section", "mrope_interleaved"})

super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)

Expand Down Expand Up @@ -450,7 +451,7 @@ def __init__(
self.rope_scaling = rope_scaling
self.head_dim = head_dim or hidden_size // num_attention_heads

rope_config_validation(self, ignore_keys={"mrope_section", "mrope_interleaved"})
# rope_config_validation(self, ignore_keys={"mrope_section", "mrope_interleaved"})

# MoE arguments
self.decoder_sparse_step = decoder_sparse_step
Expand Down
15 changes: 9 additions & 6 deletions scripts/ci/ci_install_dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then

# Clean up existing installations
$PIP_CMD uninstall -y sgl-kernel sglang $PIP_INSTALL_SUFFIX || true
$PIP_CMD uninstall -y flash_attn || true
$PIP_CMD uninstall -y flashinfer-python flashinfer-cubin flashinfer-jit-cache $PIP_INSTALL_SUFFIX || true
else
# In normal cases, we use uv, which is much faster than pip.
Expand All @@ -91,9 +92,16 @@ else

# Clean up existing installations
$PIP_CMD uninstall sgl-kernel sglang || true
$PIP_CMD uninstall flash_attn || true
$PIP_CMD uninstall flashinfer-python flashinfer-cubin flashinfer-jit-cache || true
fi

if [ "$IS_BLACKWELL" != "1" ]; then
# For lmms_evals evaluating MMMU
git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
$PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
fi

EXTRAS="dev"
if [ -n "$OPTIONAL_DEPS" ]; then
EXTRAS="dev,${OPTIONAL_DEPS}"
Expand Down Expand Up @@ -129,17 +137,12 @@ $PIP_CMD list

$PIP_CMD install mooncake-transfer-engine==0.3.7.post2 "${NVRTC_SPEC}" py-spy scipy huggingface_hub[hf_xet] pytest $PIP_INSTALL_SUFFIX

if [ "$IS_BLACKWELL" != "1" ]; then
# For lmms_evals evaluating MMMU
git clone --branch v0.5 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
$PIP_CMD install -e lmms-eval/ $PIP_INSTALL_SUFFIX
fi

# DeepEP depends on nvshmem 3.4.5
$PIP_CMD install nvidia-nvshmem-cu12==3.4.5 --force-reinstall $PIP_INSTALL_SUFFIX

# Cudnn with version less than 9.16.0.29 will cause performance regression on Conv3D kernel
$PIP_CMD install nvidia-cudnn-cu12==9.16.0.29 --force-reinstall $PIP_INSTALL_SUFFIX

$PIP_CMD uninstall xformers || true
# Show current packages
$PIP_CMD list
Expand Down
Loading