diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 9f7b8140b6bd..418260e1e399 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -425,7 +425,7 @@ jobs: strategy: fail-fast: false matrix: - partition: [0, 1] + partition: [0, 1, 2] steps: - name: Checkout code uses: actions/checkout@v4 @@ -446,7 +446,44 @@ jobs: timeout-minutes: 30 run: | cd test/ - python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 2 + python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 3 + + stage-b-test-2-gpu: + needs: [check-changes, call-gate, stage-a-test-1, sgl-kernel-build-wheels] + if: | + always() && + ( + (inputs.target_stage == 'stage-b-test-2-gpu') || + ( + !inputs.target_stage && + (github.event_name == 'schedule' || (!failure() && !cancelled())) && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + ) + ) + runs-on: 2-gpu-runner + env: + RUNNER_LABELS: 2-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Download artifacts + if: needs.check-changes.outputs.sgl_kernel == 'true' + uses: actions/download-artifact@v4 + with: + path: sgl-kernel/dist/ + merge-multiple: true + pattern: wheel-python3.10-cuda12.9 + + - name: Install dependencies + run: | + CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 30 + run: | + cd test/ + python3 run_suite.py --hw cuda --suite stage-b-test-small-2-gpu multimodal-gen-test-1-gpu: needs: [check-changes, call-gate, sgl-kernel-build-wheels] @@ -1326,6 +1363,7 @@ jobs: stage-a-test-1, stage-b-test-small-1-gpu, + stage-b-test-2-gpu, quantization-test, unit-test-backend-1-gpu, unit-test-backend-2-gpu, diff --git a/python/sglang/test/ci/__init__.py b/python/sglang/test/ci/__init__.py new file mode 100644 index 000000000000..595f4b79d0d0 --- /dev/null +++ b/python/sglang/test/ci/__init__.py @@ -0,0 +1 @@ +"""CI utilities for SGLang test infrastructure.""" diff --git a/test/lora_utils.py b/python/sglang/test/lora_utils.py similarity index 100% rename from test/lora_utils.py rename to python/sglang/test/lora_utils.py diff --git a/scripts/ci/slash_command_handler.py b/scripts/ci/slash_command_handler.py index fa3bb8557ada..f43505d0f97e 100644 --- a/scripts/ci/slash_command_handler.py +++ b/scripts/ci/slash_command_handler.py @@ -144,6 +144,7 @@ def handle_rerun_stage( nvidia_stages = [ "stage-a-test-1", "stage-b-test-small-1-gpu", + "stage-b-test-2-gpu", "multimodal-gen-test-1-gpu", "multimodal-gen-test-2-gpu", "quantization-test", diff --git a/test/srt/lora/test_lora.py b/test/registered/lora/test_lora.py similarity index 87% rename from test/srt/lora/test_lora.py rename to test/registered/lora/test_lora.py index 3dc08d2f16f4..6d5c2b340547 100644 --- a/test/srt/lora/test_lora.py +++ b/test/registered/lora/test_lora.py @@ -14,22 +14,18 @@ import multiprocessing as mp import os -import sys import unittest -from pathlib import Path -# Add test directory to path for lora_utils import -# TODO: can be removed after migration -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) - -from lora_utils import ( +from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.lora_utils import ( ALL_OTHER_MULTI_LORA_MODELS, CI_MULTI_LORA_MODELS, run_lora_multiple_batch_on_model_cases, ) - from sglang.test.test_utils import CustomTestCase, is_in_ci +register_cuda_ci(est_time=82, suite="stage-b-test-small-1-gpu") + class TestLoRA(CustomTestCase): def test_ci_lora_models(self): diff --git a/test/srt/lora/test_lora_backend.py b/test/registered/lora/test_lora_backend.py similarity index 91% rename from test/srt/lora/test_lora_backend.py rename to test/registered/lora/test_lora_backend.py index bf9d5b75ca32..e8f9134f656e 100644 --- a/test/srt/lora/test_lora_backend.py +++ b/test/registered/lora/test_lora_backend.py @@ -14,16 +14,11 @@ import multiprocessing as mp import os -import sys import unittest -from pathlib import Path from typing import List -# Add test directory to path for lora_utils import -# TODO: can be removed after migration -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) - -from lora_utils import ( +from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.lora_utils import ( ALL_OTHER_LORA_MODELS, BACKENDS, CI_LORA_MODELS, @@ -32,9 +27,10 @@ LoRAModelCase, run_lora_test_one_by_one, ) - from sglang.test.test_utils import CustomTestCase, is_in_ci +register_cuda_ci(est_time=200, suite="stage-b-test-small-1-gpu") + class TestLoRABackend(CustomTestCase): diff --git a/test/srt/lora/test_lora_eviction.py b/test/registered/lora/test_lora_eviction.py similarity index 98% rename from test/srt/lora/test_lora_eviction.py rename to test/registered/lora/test_lora_eviction.py index 78cdd8282fe0..7d9fb6f2e5aa 100644 --- a/test/srt/lora/test_lora_eviction.py +++ b/test/registered/lora/test_lora_eviction.py @@ -19,9 +19,12 @@ import torch +from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.runners import SRTRunner from sglang.test.test_utils import CustomTestCase +register_cuda_ci(est_time=224, suite="stage-b-test-small-1-gpu") + PROMPTS = [ "AI is a field of computer science focused on", """ diff --git a/test/nightly/test_lora_eviction_policy.py b/test/registered/lora/test_lora_eviction_policy.py similarity index 100% rename from test/nightly/test_lora_eviction_policy.py rename to test/registered/lora/test_lora_eviction_policy.py diff --git a/test/srt/lora/test_lora_hf_sgl_logprob_diff.py b/test/registered/lora/test_lora_hf_sgl_logprob_diff.py similarity index 96% rename from test/srt/lora/test_lora_hf_sgl_logprob_diff.py rename to test/registered/lora/test_lora_hf_sgl_logprob_diff.py index b0975fa5d666..10c03e68a814 100644 --- a/test/srt/lora/test_lora_hf_sgl_logprob_diff.py +++ b/test/registered/lora/test_lora_hf_sgl_logprob_diff.py @@ -28,28 +28,24 @@ """ import multiprocessing as mp -import os -import sys import unittest from typing import Any, Dict, List, Optional, Tuple import numpy as np import torch -# Add sglang to path if needed -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../python")) - from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.runners import HFRunner, SRTRunner -register_cuda_ci(est_time=300, suite="nightly-1-gpu", nightly=True) - -from sglang.test.test_utils import ( - DEFAULT_PORT_FOR_SRT_TEST_RUNNER, - CustomTestCase, - is_in_ci, +register_cuda_ci( + est_time=300, + suite="nightly-1-gpu", + nightly=True, + disabled="Temporarily disabled, will be fixed later", ) +from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER, CustomTestCase + # Test configuration constants LORA_BACKEND = "triton" DISABLE_CUDA_GRAPH = False @@ -510,10 +506,6 @@ def test_lora_logprob_comparison_basic(self): """ Basic test comparing HF and SGLang LoRA logprobs with small model. """ - # Use a smaller model and shorter prompts for CI - if is_in_ci(): - self.skipTest("Skipping in CI environment - requires large models") - model_path = "meta-llama/Llama-2-7b-hf" lora_paths = ["yushengsu/sglang_lora_logprob_diff_without_tuning"] prompts = DEFAULT_TEST_PROMPTS[:2] # Use fewer prompts for faster testing @@ -529,9 +521,6 @@ def test_lora_logprob_comparison_full(self): """ Full test comparing HF and SGLang LoRA logprobs with all prompts. """ - if is_in_ci(): - self.skipTest("Skipping in CI environment - requires large models") - model_path = "meta-llama/Llama-2-7b-hf" lora_paths = ["yushengsu/sglang_lora_logprob_diff_without_tuning"] prompts = DEFAULT_TEST_PROMPTS diff --git a/test/nightly/test_lora_openai_api.py b/test/registered/lora/test_lora_openai_api.py similarity index 100% rename from test/nightly/test_lora_openai_api.py rename to test/registered/lora/test_lora_openai_api.py diff --git a/test/nightly/test_lora_openai_compatible.py b/test/registered/lora/test_lora_openai_compatible.py similarity index 100% rename from test/nightly/test_lora_openai_compatible.py rename to test/registered/lora/test_lora_openai_compatible.py diff --git a/test/nightly/test_lora_qwen3.py b/test/registered/lora/test_lora_qwen3.py similarity index 88% rename from test/nightly/test_lora_qwen3.py rename to test/registered/lora/test_lora_qwen3.py index 39d5a755efcf..da6999f422c8 100644 --- a/test/nightly/test_lora_qwen3.py +++ b/test/registered/lora/test_lora_qwen3.py @@ -13,22 +13,15 @@ # ============================================================================== import multiprocessing as mp -import sys import unittest -from pathlib import Path -# Add test directory to path for lora_utils import -# TODO: can be removed after migration -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -from lora_utils import ( +from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.lora_utils import ( LoRAAdaptor, LoRAModelCase, run_lora_multiple_batch_on_model_cases, ) -from sglang.test.ci.ci_register import register_cuda_ci - register_cuda_ci(est_time=97, suite="nightly-1-gpu", nightly=True) from sglang.test.test_utils import CustomTestCase diff --git a/test/nightly/test_lora_radix_cache.py b/test/registered/lora/test_lora_radix_cache.py similarity index 90% rename from test/nightly/test_lora_radix_cache.py rename to test/registered/lora/test_lora_radix_cache.py index fee9bfce1d15..84ffd2cceda6 100644 --- a/test/nightly/test_lora_radix_cache.py +++ b/test/registered/lora/test_lora_radix_cache.py @@ -13,19 +13,12 @@ # ============================================================================== import multiprocessing as mp -import sys import unittest -from pathlib import Path import torch -# Add test directory to path for lora_utils import -# TODO: can be removed after migration -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -from lora_utils import CI_MULTI_LORA_MODELS, run_lora_test_one_by_one - from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.lora_utils import CI_MULTI_LORA_MODELS, run_lora_test_one_by_one register_cuda_ci(est_time=200, suite="nightly-1-gpu", nightly=True) diff --git a/test/srt/lora/test_lora_tp.py b/test/registered/lora/test_lora_tp.py similarity index 91% rename from test/srt/lora/test_lora_tp.py rename to test/registered/lora/test_lora_tp.py index 8e7b23f62dbf..c90979c9a3ac 100644 --- a/test/srt/lora/test_lora_tp.py +++ b/test/registered/lora/test_lora_tp.py @@ -14,16 +14,11 @@ import multiprocessing as mp import os -import sys import unittest -from pathlib import Path from typing import List -# Add test directory to path for lora_utils import -# TODO: can be removed after migration -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) - -from lora_utils import ( +from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.lora_utils import ( ALL_OTHER_LORA_MODELS, CI_LORA_MODELS, DEFAULT_PROMPTS, @@ -31,9 +26,10 @@ LoRAModelCase, run_lora_test_one_by_one, ) - from sglang.test.test_utils import CustomTestCase, is_in_ci +register_cuda_ci(est_time=116, suite="stage-b-test-small-2-gpu") + class TestLoRATP(CustomTestCase): diff --git a/test/srt/lora/test_lora_update.py b/test/registered/lora/test_lora_update.py similarity index 99% rename from test/srt/lora/test_lora_update.py rename to test/registered/lora/test_lora_update.py index 9c3f0855033b..957c1d7cefcc 100644 --- a/test/srt/lora/test_lora_update.py +++ b/test/registered/lora/test_lora_update.py @@ -23,6 +23,7 @@ import torch from sglang.srt.utils import kill_process_tree +from sglang.test.ci.ci_register import register_cuda_ci from sglang.test.runners import SRTRunner from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, @@ -33,6 +34,8 @@ popen_launch_server, ) +register_cuda_ci(est_time=451, suite="stage-b-test-small-1-gpu") + PROMPTS = [ "SGL is a", "AI is a field of computer science focused on", diff --git a/test/srt/lora/test_multi_lora_backend.py b/test/registered/lora/test_multi_lora_backend.py similarity index 90% rename from test/srt/lora/test_multi_lora_backend.py rename to test/registered/lora/test_multi_lora_backend.py index 84def4813a9d..58dfdc16ca36 100644 --- a/test/srt/lora/test_multi_lora_backend.py +++ b/test/registered/lora/test_multi_lora_backend.py @@ -14,22 +14,18 @@ import multiprocessing as mp import os -import sys import unittest -from pathlib import Path -# Add test directory to path for lora_utils import -# TODO: can be removed after migration -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) - -from lora_utils import ( +from sglang.test.ci.ci_register import register_cuda_ci +from sglang.test.lora_utils import ( ALL_OTHER_MULTI_LORA_MODELS, CI_MULTI_LORA_MODELS, run_lora_multiple_batch_on_model_cases, ) - from sglang.test.test_utils import CustomTestCase, is_in_ci +register_cuda_ci(est_time=60, suite="stage-b-test-small-1-gpu") + # All prompts are used at once in a batch. PROMPTS = [ "AI is a field of computer science focused on", diff --git a/test/run_suite.py b/test/run_suite.py index 32a5fd744cd6..ed521752dd9c 100644 --- a/test/run_suite.py +++ b/test/run_suite.py @@ -19,7 +19,11 @@ PER_COMMIT_SUITES = { HWBackend.CPU: ["default"], HWBackend.AMD: ["stage-a-test-1"], - HWBackend.CUDA: ["stage-a-test-1", "stage-b-test-small-1-gpu"], + HWBackend.CUDA: [ + "stage-a-test-1", + "stage-b-test-small-1-gpu", + "stage-b-test-small-2-gpu", + ], HWBackend.NPU: [], } diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 91c224e89459..81b5d3ed5a60 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -13,11 +13,6 @@ TestFile("layers/attention/mamba/test_causal_conv1d.py", 25), TestFile("layers/attention/mamba/test_mamba_ssm.py", 7), TestFile("layers/attention/mamba/test_mamba_ssm_ssd.py", 13), - TestFile("lora/test_lora.py", 82), - TestFile("lora/test_lora_eviction.py", 224), - TestFile("lora/test_lora_update.py", 451), - TestFile("lora/test_lora_backend.py", 200), - TestFile("lora/test_multi_lora_backend.py", 60), TestFile("models/test_compressed_tensors_models.py", 42), TestFile("models/test_cross_encoder_models.py", 100), TestFile("models/test_embedding_models.py", 73), @@ -133,7 +128,6 @@ TestFile("hicache/test_hicache_storage_file_backend.py", 200), TestFile("hicache/test_hicache_storage_mooncake_backend.py", 300), TestFile("layers/attention/mamba/test_mamba2_mixer.py", 50), - TestFile("lora/test_lora_tp.py", 116), TestFile("models/test_glm4_moe_models.py", 100), TestFile("models/test_kimi_linear_models.py", 90), TestFile("rl/test_update_weights_from_distributed.py", 103), @@ -201,10 +195,8 @@ TestFile("test_quantization.py", 185), TestFile("test_gguf.py", 96), ], - # Nightly test suites have been moved to test/run_suite_nightly.py "__not_in_ci__": [ TestFile("test_release_memory_occupation.py", 200), # Temporarily disabled - TestFile("lora/test_lora_hf_sgl_logprob_diff.py"), # Nightly test TestFile("models/test_dummy_grok_models.py"), TestFile( "rl/test_update_weights_from_disk.py" @@ -228,12 +220,10 @@ # TestFile("hicache/test_hicache.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575 # TestFile("hicache/test_hicache_mla.py", 127), # Disabled temporarily, # Temporarily disabled, see https://github.com/sgl-project/sglang/issues/12574 # TestFile("hicache/test_hicache_storage.py", 127), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/12575 - TestFile("lora/test_lora.py", 665), + # LoRA tests moved to test/registered/lora/ - AMD entries need to be re-added there # TestFile("lora/test_lora_backend.py", 99), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107 # TestFile("lora/test_lora_cuda_graph.py", 250), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107 - TestFile("lora/test_lora_eviction.py", 240), # TestFile("lora/test_lora_qwen3.py", 97), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107 - TestFile("lora/test_multi_lora_backend.py", 60), TestFile("models/test_compressed_tensors_models.py", 42), TestFile("models/test_qwen_models.py", 82), TestFile("models/test_reward_models.py", 132), @@ -308,7 +298,7 @@ TestFile("test_mla.py", 242), ], "per-commit-2-gpu-amd": [ - # TestFile("lora/test_lora_tp.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107 + # TestFile("lora/test_lora_tp.py", 116), # Disabled temporarily, see https://github.com/sgl-project/sglang/issues/13107. Moved to test/registered/lora/ TestFile("rl/test_update_weights_from_distributed.py", 103), TestFile("test_data_parallelism.py", 73), TestFile("test_load_weights_from_remote_instance.py", 72),