diff --git a/.github/workflows/nightly-test-nvidia.yml b/.github/workflows/nightly-test-nvidia.yml index 850151f75a3b..68f533c8489a 100644 --- a/.github/workflows/nightly-test-nvidia.yml +++ b/.github/workflows/nightly-test-nvidia.yml @@ -73,6 +73,84 @@ jobs: cd test python3 run_suite_nightly.py --suite nightly-8-gpu-h200 --continue-on-error + - name: Run Qwen3-235B nightly performance test + timeout-minutes: 180 + env: + TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + GPU_CONFIG: "8-gpu-h200" + run: | + rm -rf test/performance_profiles_qwen3_235b/ + cd test + python3 nightly/test_qwen3_235b_perf.py + + - name: Publish Qwen3-235B traces to storage repo + env: + GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + run: | + python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_qwen3_235b + + - name: Run Kimi-K2-Thinking nightly performance test + timeout-minutes: 180 + env: + TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + GPU_CONFIG: "8-gpu-h200" + run: | + rm -rf test/performance_profiles_kimi_k2_thinking/ + cd test + python3 nightly/test_kimi_k2_thinking_perf.py + + - name: Publish Kimi-K2-Thinking traces to storage repo + env: + GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + run: | + python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_kimi_k2_thinking + + - name: Run GLM-4.6 nightly performance test + timeout-minutes: 180 + env: + TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + GPU_CONFIG: "8-gpu-h200" + run: | + rm -rf test/performance_profiles_glm_4_6/ + cd test + IS_BLACKWELL=1 python3 nightly/test_glm_4_6_perf.py + + - name: Publish GLM-4.6 traces to storage repo + env: + GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + run: | + python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_glm_4_6 + + # MiniMax-M2 test temporarily disabled due to compatibility issues + # See MINIMAX_M2_ISSUES.md for details + # - name: Run MiniMax-M2 nightly performance test + # timeout-minutes: 180 + # env: + # TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + # PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + # GPU_CONFIG: "8-gpu-h200" + # run: | + # rm -rf test/performance_profiles_minimax_m2/ + # cd test + # python3 nightly/test_minimax_m2_perf.py + + # - name: Publish MiniMax-M2 traces to storage repo + # env: + # GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + # GITHUB_RUN_ID: ${{ github.run_id }} + # GITHUB_RUN_NUMBER: ${{ github.run_number }} + # run: | + # python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_minimax_m2 + # General tests - 8 GPU H20 nightly-test-general-8-gpu-h20: if: github.repository == 'sgl-project/sglang' @@ -325,6 +403,84 @@ jobs: run: | python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_deepseek_v32 + - name: Run Kimi-K2-Thinking nightly performance test + timeout-minutes: 180 + env: + TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + GPU_CONFIG: "8-gpu-b200" + run: | + rm -rf test/performance_profiles_kimi_k2_thinking/ + cd test + IS_BLACKWELL=1 python3 nightly/test_kimi_k2_thinking_perf.py + + - name: Publish Kimi-K2-Thinking traces to storage repo + env: + GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + run: | + python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_kimi_k2_thinking + + - name: Run Qwen3-235B nightly performance test + timeout-minutes: 180 + env: + TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + GPU_CONFIG: "8-gpu-b200" + run: | + rm -rf test/performance_profiles_qwen3_235b/ + cd test + IS_BLACKWELL=1 python3 nightly/test_qwen3_235b_perf.py + + - name: Publish Qwen3-235B traces to storage repo + env: + GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + run: | + python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_qwen3_235b + + - name: Run GLM-4.6 nightly performance test + timeout-minutes: 180 + env: + TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + GPU_CONFIG: "8-gpu-b200" + run: | + rm -rf test/performance_profiles_glm_4_6/ + cd test + IS_BLACKWELL=1 python3 nightly/test_glm_4_6_perf.py + + - name: Publish GLM-4.6 traces to storage repo + env: + GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + GITHUB_RUN_ID: ${{ github.run_id }} + GITHUB_RUN_NUMBER: ${{ github.run_number }} + run: | + python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_glm_4_6 + + # MiniMax-M2 test temporarily disabled due to compatibility issues + # See MINIMAX_M2_ISSUES.md for details + # - name: Run MiniMax-M2 nightly performance test + # timeout-minutes: 180 + # env: + # TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }} + # PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }} + # GPU_CONFIG: "8-gpu-b200" + # run: | + # rm -rf test/performance_profiles_minimax_m2/ + # cd test + # IS_BLACKWELL=1 python3 nightly/test_minimax_m2_perf.py + + # - name: Publish MiniMax-M2 traces to storage repo + # env: + # GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }} + # GITHUB_RUN_ID: ${{ github.run_id }} + # GITHUB_RUN_NUMBER: ${{ github.run_number }} + # run: | + # python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_minimax_m2 + # Final check job check-all-jobs: if: github.repository == 'sgl-project/sglang' && always() diff --git a/test/nightly/test_glm_4_6_perf.py b/test/nightly/test_glm_4_6_perf.py new file mode 100644 index 000000000000..a08df730a2f8 --- /dev/null +++ b/test/nightly/test_glm_4_6_perf.py @@ -0,0 +1,49 @@ +import unittest + +from nightly_utils import NightlyBenchmarkRunner + +from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env + +GLM_4_6_MODEL_PATH = "zai-org/GLM-4.6" +PROFILE_DIR = "performance_profiles_glm_4_6" + + +class TestNightlyGLM46Performance(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model = GLM_4_6_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + cls.batch_sizes = [1, 1, 8, 16, 64] + cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096")) + cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512")) + + # GLM-4.6 is a 357B MoE model + cls.other_args = [ + "--tp", + "8", + "--trust-remote-code", + ] + + cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url) + cls.runner.setup_profile_directory() + + def test_bench_one_batch(self): + results, success = self.runner.run_benchmark_for_model( + model_path=self.model, + batch_sizes=self.batch_sizes, + input_lens=self.input_lens, + output_lens=self.output_lens, + other_args=self.other_args, + ) + + self.runner.add_report(results) + self.runner.write_final_report() + + if not success: + raise AssertionError( + f"Benchmark failed for {self.model}. Check the logs for details." + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/nightly/test_kimi_k2_thinking_perf.py b/test/nightly/test_kimi_k2_thinking_perf.py new file mode 100644 index 000000000000..aee86209549f --- /dev/null +++ b/test/nightly/test_kimi_k2_thinking_perf.py @@ -0,0 +1,54 @@ +import unittest + +from nightly_utils import NightlyBenchmarkRunner + +from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env + +KIMI_K2_THINKING_MODEL_PATH = "moonshotai/Kimi-K2-Thinking" +PROFILE_DIR = "performance_profiles_kimi_k2_thinking" + + +class TestNightlyKimiK2ThinkingPerformance(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model = KIMI_K2_THINKING_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + cls.batch_sizes = [1, 1, 8, 16, 64] + cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096")) + cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512")) + + # Kimi-K2-Thinking requires specific launch arguments + cls.other_args = [ + "--tp", + "8", + "--trust-remote-code", + "--tool-call-parser", + "kimi_k2", + "--reasoning-parser", + "kimi_k2", + ] + + cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url) + cls.runner.setup_profile_directory() + + def test_bench_one_batch(self): + results, success = self.runner.run_benchmark_for_model( + model_path=self.model, + batch_sizes=self.batch_sizes, + input_lens=self.input_lens, + output_lens=self.output_lens, + other_args=self.other_args, + extra_bench_args=["--trust-remote-code"], + ) + + self.runner.add_report(results) + self.runner.write_final_report() + + if not success: + raise AssertionError( + f"Benchmark failed for {self.model}. Check the logs for details." + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/nightly/test_minimax_m2_perf.py b/test/nightly/test_minimax_m2_perf.py new file mode 100644 index 000000000000..4ce770d95e97 --- /dev/null +++ b/test/nightly/test_minimax_m2_perf.py @@ -0,0 +1,49 @@ +import unittest + +from nightly_utils import NightlyBenchmarkRunner + +from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env + +MINIMAX_M2_MODEL_PATH = "MiniMaxAI/MiniMax-M2" +PROFILE_DIR = "performance_profiles_minimax_m2" + + +class TestNightlyMiniMaxM2Performance(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model = MINIMAX_M2_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + cls.batch_sizes = [1, 1, 8, 16, 64] + cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096")) + cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512")) + + # MiniMax-M2 is a 230B MoE model with 10B active params + cls.other_args = [ + "--tp", + "8", + "--trust-remote-code", + ] + + cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url) + cls.runner.setup_profile_directory() + + def test_bench_one_batch(self): + results, success = self.runner.run_benchmark_for_model( + model_path=self.model, + batch_sizes=self.batch_sizes, + input_lens=self.input_lens, + output_lens=self.output_lens, + other_args=self.other_args, + ) + + self.runner.add_report(results) + self.runner.write_final_report() + + if not success: + raise AssertionError( + f"Benchmark failed for {self.model}. Check the logs for details." + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/nightly/test_qwen3_235b_perf.py b/test/nightly/test_qwen3_235b_perf.py new file mode 100644 index 000000000000..7988bbe7d5e4 --- /dev/null +++ b/test/nightly/test_qwen3_235b_perf.py @@ -0,0 +1,49 @@ +import unittest + +from nightly_utils import NightlyBenchmarkRunner + +from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env + +QWEN3_235B_MODEL_PATH = "Qwen/Qwen3-235B-A22B-Instruct-2507" +PROFILE_DIR = "performance_profiles_qwen3_235b" + + +class TestNightlyQwen3235BPerformance(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.model = QWEN3_235B_MODEL_PATH + cls.base_url = DEFAULT_URL_FOR_TEST + cls.batch_sizes = [1, 1, 8, 16, 64] + cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096")) + cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512")) + + # Qwen3-235B requires TP=8 for 8 GPUs + cls.other_args = [ + "--tp", + "8", + "--trust-remote-code", + ] + + cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url) + cls.runner.setup_profile_directory() + + def test_bench_one_batch(self): + results, success = self.runner.run_benchmark_for_model( + model_path=self.model, + batch_sizes=self.batch_sizes, + input_lens=self.input_lens, + output_lens=self.output_lens, + other_args=self.other_args, + ) + + self.runner.add_report(results) + self.runner.write_final_report() + + if not success: + raise AssertionError( + f"Benchmark failed for {self.model}. Check the logs for details." + ) + + +if __name__ == "__main__": + unittest.main()