Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ jobs:
strategy:
fail-fast: false
matrix:
partition: [0, 1]
partition: [0, 1, 2]
steps:
- name: Checkout code
uses: actions/checkout@v4
Expand All @@ -446,7 +446,44 @@ jobs:
timeout-minutes: 30
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 2
python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 3

stage-b-test-2-gpu:
needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-2-gpu') ||
(
!inputs.target_stage &&
(github.event_name == 'schedule' || (!failure() && !cancelled())) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
runs-on: 2-gpu-runner
env:
RUNNER_LABELS: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
cd test/
python3 run_suite.py --hw cuda --suite stage-b-test-small-2-gpu

multimodal-gen-test-1-gpu:
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
Expand Down Expand Up @@ -1326,6 +1363,7 @@ jobs:

stage-a-test-1,
stage-b-test-small-1-gpu,
stage-b-test-2-gpu,
quantization-test,
unit-test-backend-1-gpu,
unit-test-backend-2-gpu,
Expand Down
1 change: 1 addition & 0 deletions python/sglang/test/ci/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""CI utilities for SGLang test infrastructure."""
File renamed without changes.
1 change: 1 addition & 0 deletions scripts/ci/slash_command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def handle_rerun_stage(
nvidia_stages = [
"stage-a-test-1",
"stage-b-test-small-1-gpu",
"stage-b-test-2-gpu",
"multimodal-gen-test-1-gpu",
"multimodal-gen-test-2-gpu",
"quantization-test",
Expand Down
12 changes: 4 additions & 8 deletions test/srt/lora/test_lora.py → test/registered/lora/test_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,18 @@

import multiprocessing as mp
import os
import sys
import unittest
from pathlib import Path

# Add test directory to path for lora_utils import
# TODO: can be removed after migration
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))

from lora_utils import (
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.lora_utils import (
ALL_OTHER_MULTI_LORA_MODELS,
CI_MULTI_LORA_MODELS,
run_lora_multiple_batch_on_model_cases,
)

from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=82, suite="stage-b-test-small-1-gpu")


class TestLoRA(CustomTestCase):
def test_ci_lora_models(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,11 @@

import multiprocessing as mp
import os
import sys
import unittest
from pathlib import Path
from typing import List

# Add test directory to path for lora_utils import
# TODO: can be removed after migration
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))

from lora_utils import (
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.lora_utils import (
ALL_OTHER_LORA_MODELS,
BACKENDS,
CI_LORA_MODELS,
Expand All @@ -32,9 +27,10 @@
LoRAModelCase,
run_lora_test_one_by_one,
)

from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu")


class TestLoRABackend(CustomTestCase):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@

import torch

from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.runners import SRTRunner
from sglang.test.test_utils import CustomTestCase

register_cuda_ci(est_time=224, suite="stage-b-test-small-1-gpu")

PROMPTS = [
"AI is a field of computer science focused on",
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,24 @@
"""

import multiprocessing as mp
import os
import sys
import unittest
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import torch

# Add sglang to path if needed
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../python"))

from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.runners import HFRunner, SRTRunner

register_cuda_ci(est_time=300, suite="nightly-1-gpu", nightly=True)

from sglang.test.test_utils import (
DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
CustomTestCase,
is_in_ci,
register_cuda_ci(
est_time=300,
suite="nightly-1-gpu",
nightly=True,
disabled="Temporarily disabled, will be fixed later",
)

from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, CustomTestCase

# Test configuration constants
LORA_BACKEND = "triton"
DISABLE_CUDA_GRAPH = False
Expand Down Expand Up @@ -510,10 +506,6 @@ def test_lora_logprob_comparison_basic(self):
"""
Basic test comparing HF and SGLang LoRA logprobs with small model.
"""
# Use a smaller model and shorter prompts for CI
if is_in_ci():
self.skipTest("Skipping in CI environment - requires large models")

model_path = "meta-llama/Llama-2-7b-hf"
lora_paths = ["yushengsu/sglang_lora_logprob_diff_without_tuning"]
prompts = DEFAULT_TEST_PROMPTS[:2] # Use fewer prompts for faster testing
Expand All @@ -529,9 +521,6 @@ def test_lora_logprob_comparison_full(self):
"""
Full test comparing HF and SGLang LoRA logprobs with all prompts.
"""
if is_in_ci():
self.skipTest("Skipping in CI environment - requires large models")

model_path = "meta-llama/Llama-2-7b-hf"
lora_paths = ["yushengsu/sglang_lora_logprob_diff_without_tuning"]
prompts = DEFAULT_TEST_PROMPTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,15 @@
# ==============================================================================

import multiprocessing as mp
import sys
import unittest
from pathlib import Path

# Add test directory to path for lora_utils import
# TODO: can be removed after migration
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from lora_utils import (
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.lora_utils import (
LoRAAdaptor,
LoRAModelCase,
run_lora_multiple_batch_on_model_cases,
)

from sglang.test.ci.ci_register import register_cuda_ci

register_cuda_ci(est_time=97, suite="nightly-1-gpu", nightly=True)

from sglang.test.test_utils import CustomTestCase
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,12 @@
# ==============================================================================

import multiprocessing as mp
import sys
import unittest
from pathlib import Path

import torch

# Add test directory to path for lora_utils import
# TODO: can be removed after migration
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from lora_utils import CI_MULTI_LORA_MODELS, run_lora_test_one_by_one

from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.lora_utils import CI_MULTI_LORA_MODELS, run_lora_test_one_by_one

register_cuda_ci(est_time=200, suite="nightly-1-gpu", nightly=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,22 @@

import multiprocessing as mp
import os
import sys
import unittest
from pathlib import Path
from typing import List

# Add test directory to path for lora_utils import
# TODO: can be removed after migration
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))

from lora_utils import (
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.lora_utils import (
ALL_OTHER_LORA_MODELS,
CI_LORA_MODELS,
DEFAULT_PROMPTS,
TORCH_DTYPES,
LoRAModelCase,
run_lora_test_one_by_one,
)

from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=116, suite="stage-b-test-small-2-gpu")


class TestLoRATP(CustomTestCase):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import torch

from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.runners import SRTRunner
from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
Expand All @@ -33,6 +34,8 @@
popen_launch_server,
)

register_cuda_ci(est_time=451, suite="stage-b-test-small-1-gpu")

PROMPTS = [
"SGL is a",
"AI is a field of computer science focused on",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,18 @@

import multiprocessing as mp
import os
import sys
import unittest
from pathlib import Path

# Add test directory to path for lora_utils import
# TODO: can be removed after migration
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))

from lora_utils import (
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.lora_utils import (
ALL_OTHER_MULTI_LORA_MODELS,
CI_MULTI_LORA_MODELS,
run_lora_multiple_batch_on_model_cases,
)

from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=60, suite="stage-b-test-small-1-gpu")

# All prompts are used at once in a batch.
PROMPTS = [
"AI is a field of computer science focused on",
Expand Down
6 changes: 5 additions & 1 deletion test/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
PER_COMMIT_SUITES = {
HWBackend.CPU: ["default"],
HWBackend.AMD: ["stage-a-test-1"],
HWBackend.CUDA: ["stage-a-test-1", "stage-b-test-small-1-gpu"],
HWBackend.CUDA: [
"stage-a-test-1",
"stage-b-test-small-1-gpu",
"stage-b-test-small-2-gpu",
],
HWBackend.NPU: [],
}

Expand Down
14 changes: 2 additions & 12 deletions test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
TestFile("layers/attention/mamba/test_causal_conv1d.py", 25),
TestFile("layers/attention/mamba/test_mamba_ssm.py", 7),
TestFile("layers/attention/mamba/test_mamba_ssm_ssd.py", 13),
TestFile("lora/test_lora.py", 82),
TestFile("lora/test_lora_eviction.py", 224),
TestFile("lora/test_lora_update.py", 451),
TestFile("lora/test_lora_backend.py", 200),
TestFile("lora/test_multi_lora_backend.py", 60),
TestFile("models/test_compressed_tensors_models.py", 42),
TestFile("models/test_cross_encoder_models.py", 100),
TestFile("models/test_embedding_models.py", 73),
Expand Down Expand Up @@ -133,7 +128,6 @@
TestFile("hicache/test_hicache_storage_file_backend.py", 200),
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 300),
TestFile("layers/attention/mamba/test_mamba2_mixer.py", 50),
TestFile("lora/test_lora_tp.py", 116),
TestFile("models/test_glm4_moe_models.py", 100),
TestFile("models/test_kimi_linear_models.py", 90),
TestFile("rl/test_update_weights_from_distributed.py", 103),
Expand Down Expand Up @@ -201,10 +195,8 @@
TestFile("test_quantization.py", 185),
TestFile("test_gguf.py", 96),
],
# Nightly test suites have been moved to test/run_suite_nightly.py
"__not_in_ci__": [
TestFile("test_release_memory_occupation.py", 200), # Temporarily disabled
TestFile("lora/test_lora_hf_sgl_logprob_diff.py"), # Nightly test
TestFile("models/test_dummy_grok_models.py"),
TestFile(
"rl/test_update_weights_from_disk.py"
Expand All @@ -228,12 +220,10 @@
# TestFile("hicache/test_hicache.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
# TestFile("hicache/test_hicache_mla.py", 127), # Disabled temporarily, # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574
# TestFile("hicache/test_hicache_storage.py", 127), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575
TestFile("lora/test_lora.py", 665),
# LoRA tests moved to test/registered/lora/ - AMD entries need to be re-added there
# TestFile("lora/test_lora_backend.py", 99), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107
# TestFile("lora/test_lora_cuda_graph.py", 250), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107
TestFile("lora/test_lora_eviction.py", 240),
# TestFile("lora/test_lora_qwen3.py", 97), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107
TestFile("lora/test_multi_lora_backend.py", 60),
TestFile("models/test_compressed_tensors_models.py", 42),
TestFile("models/test_qwen_models.py", 82),
TestFile("models/test_reward_models.py", 132),
Expand Down Expand Up @@ -308,7 +298,7 @@
TestFile("test_mla.py", 242),
],
"per-commit-2-gpu-amd": [
# TestFile("lora/test_lora_tp.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107
# TestFile("lora/test_lora_tp.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107. Moved to test/registered/lora/
TestFile("rl/test_update_weights_from_distributed.py", 103),
TestFile("test_data_parallelism.py", 73),
TestFile("test_load_weights_from_remote_instance.py", 72),
Expand Down
Loading