diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 2997ce95891..c08e3e25cc7 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -89,6 +89,25 @@ jobs: cd test/srt python3 run_suite.py --suite per-commit-2-gpu + unittest-test-backend-8-gpu: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false + needs: [unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu] + runs-on: 8-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + bash scripts/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-8-gpu + performance-test-1-gpu-part-1: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false @@ -262,7 +281,7 @@ jobs: - name: Install dependencies run: | - bash scripts/ci_install_dependency_8_gpu.sh + bash scripts/ci_install_dependency.sh - name: Run test timeout-minutes: 10 @@ -270,35 +289,12 @@ jobs: cd test/srt python3 -m unittest test_disaggregation.TestDisaggregationMooncake.test_gsm8k - large-scale-test-8-gpu: - if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && - github.event.pull_request.draft == false - needs: [ - unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu, - performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu, - accuracy-test-1-gpu, accuracy-test-2-gpu, - ] - runs-on: 8-gpu-runner - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Install dependencies - run: | - bash scripts/ci_install_dependency_8_gpu.sh - - - name: Run test - timeout-minutes: 25 - run: | - cd test/srt - python3 run_suite.py --suite per-commit-8-gpu - finish: if: always() needs: [ - unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu, + unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu, unittest-test-backend-8-gpu, performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu, - accuracy-test-1-gpu, accuracy-test-2-gpu, large-scale-test-8-gpu, + accuracy-test-1-gpu, accuracy-test-2-gpu, ] runs-on: ubuntu-latest steps: diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 0feb44778b2..50cc0d9aa3b 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -99,9 +99,11 @@ class TestFile: TestFile("test_verl_engine.py", 64), ], "per-commit-8-gpu": [ - TestFile("test_deepep_intranode.py", 50), - TestFile("test_deepep_low_latency.py", 50), - TestFile("test_moe_deepep_eval_accuracy_large.py", 250), + # Disabled deepep tests temporarily because it takes too much time. + # TODO: re-enable them after reducing the test time with compilation cache and smaller models. + # TestFile("test_deepep_intranode.py", 50), + # TestFile("test_deepep_low_latency.py", 50), + # TestFile("test_moe_deepep_eval_accuracy_large.py", 250), TestFile("test_local_attn.py", 250), TestFile("test_full_deepseek_v3.py", 250), TestFile("test_pp_single_node.py", 150),