sgl-project · Kangyan-Zhou · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
@@ -73,6 +73,84 @@ jobs:
           cd test
           python3 run_suite_nightly.py --suite nightly-8-gpu-h200 --continue-on-error
 
+      - name: Run Qwen3-235B nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-h200"
+        run: |
+          rm -rf test/performance_profiles_qwen3_235b/
+          cd test
+          python3 nightly/test_qwen3_235b_perf.py
+
+      - name: Publish Qwen3-235B traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_qwen3_235b
+
+      - name: Run Kimi-K2-Thinking nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-h200"
+        run: |
+          rm -rf test/performance_profiles_kimi_k2_thinking/
+          cd test
+          python3 nightly/test_kimi_k2_thinking_perf.py
+
+      - name: Publish Kimi-K2-Thinking traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_kimi_k2_thinking
+
+      - name: Run GLM-4.6 nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-h200"
+        run: |
+          rm -rf test/performance_profiles_glm_4_6/
+          cd test
+          IS_BLACKWELL=1 python3 nightly/test_glm_4_6_perf.py
+
+      - name: Publish GLM-4.6 traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_glm_4_6
+
+      # MiniMax-M2 test temporarily disabled due to compatibility issues
+      # See MINIMAX_M2_ISSUES.md for details
+      # - name: Run MiniMax-M2 nightly performance test
+      #   timeout-minutes: 180
+      #   env:
+      #     TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+      #     PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+      #     GPU_CONFIG: "8-gpu-h200"
+      #   run: |
+      #     rm -rf test/performance_profiles_minimax_m2/
+      #     cd test
+      #     python3 nightly/test_minimax_m2_perf.py
+
+      # - name: Publish MiniMax-M2 traces to storage repo
+      #   env:
+      #     GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+      #     GITHUB_RUN_ID: ${{ github.run_id }}
+      #     GITHUB_RUN_NUMBER: ${{ github.run_number }}
+      #   run: |
+      #     python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_minimax_m2
+
   # General tests - 8 GPU H20
   nightly-test-general-8-gpu-h20:
     if: github.repository == 'sgl-project/sglang'
@@ -325,6 +403,84 @@ jobs:
         run: |
           python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_deepseek_v32
 
+      - name: Run Kimi-K2-Thinking nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-b200"
+        run: |
+          rm -rf test/performance_profiles_kimi_k2_thinking/
+          cd test
+          IS_BLACKWELL=1 python3 nightly/test_kimi_k2_thinking_perf.py
+
+      - name: Publish Kimi-K2-Thinking traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_kimi_k2_thinking
+
+      - name: Run Qwen3-235B nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-b200"
+        run: |
+          rm -rf test/performance_profiles_qwen3_235b/
+          cd test
+          IS_BLACKWELL=1 python3 nightly/test_qwen3_235b_perf.py
+
+      - name: Publish Qwen3-235B traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_qwen3_235b
+
+      - name: Run GLM-4.6 nightly performance test
+        timeout-minutes: 180
+        env:
+          TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+          PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+          GPU_CONFIG: "8-gpu-b200"
+        run: |
+          rm -rf test/performance_profiles_glm_4_6/
+          cd test
+          IS_BLACKWELL=1 python3 nightly/test_glm_4_6_perf.py
+
+      - name: Publish GLM-4.6 traces to storage repo
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+        run: |
+          python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_glm_4_6
+
+      # MiniMax-M2 test temporarily disabled due to compatibility issues
+      # See MINIMAX_M2_ISSUES.md for details
+      # - name: Run MiniMax-M2 nightly performance test
+      #   timeout-minutes: 180
+      #   env:
+      #     TRACE_BASE_URL: https://raw.githubusercontent.com/sglang-bot/sglang-ci-data/main/traces/${{ github.run_id }}
+      #     PERFETTO_RELAY_URL: ${{ vars.PERFETTO_RELAY_URL }}
+      #     GPU_CONFIG: "8-gpu-b200"
+      #   run: |
+      #     rm -rf test/performance_profiles_minimax_m2/
+      #     cd test
+      #     IS_BLACKWELL=1 python3 nightly/test_minimax_m2_perf.py
+
+      # - name: Publish MiniMax-M2 traces to storage repo
+      #   env:
+      #     GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+      #     GITHUB_RUN_ID: ${{ github.run_id }}
+      #     GITHUB_RUN_NUMBER: ${{ github.run_number }}
+      #   run: |
+      #     python3 scripts/ci/publish_traces.py --traces-dir test/performance_profiles_minimax_m2
+
   # Final check job
   check-all-jobs:
     if: github.repository == 'sgl-project/sglang' && always()

diff --git a/test/nightly/test_glm_4_6_perf.py b/test/nightly/test_glm_4_6_perf.py
@@ -0,0 +1,49 @@
+import unittest
+
+from nightly_utils import NightlyBenchmarkRunner
+
+from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env
+
+GLM_4_6_MODEL_PATH = "zai-org/GLM-4.6"
+PROFILE_DIR = "performance_profiles_glm_4_6"
+
+
+class TestNightlyGLM46Performance(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = GLM_4_6_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.batch_sizes = [1, 1, 8, 16, 64]
+        cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
+        cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))
+
+        # GLM-4.6 is a 357B MoE model
+        cls.other_args = [
+            "--tp",
+            "8",
+            "--trust-remote-code",
+        ]
+
+        cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
+        cls.runner.setup_profile_directory()
+
+    def test_bench_one_batch(self):
+        results, success = self.runner.run_benchmark_for_model(
+            model_path=self.model,
+            batch_sizes=self.batch_sizes,
+            input_lens=self.input_lens,
+            output_lens=self.output_lens,
+            other_args=self.other_args,
+        )
+
+        self.runner.add_report(results)
+        self.runner.write_final_report()
+
+        if not success:
+            raise AssertionError(
+                f"Benchmark failed for {self.model}. Check the logs for details."
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/nightly/test_kimi_k2_thinking_perf.py b/test/nightly/test_kimi_k2_thinking_perf.py
@@ -0,0 +1,54 @@
+import unittest
+
+from nightly_utils import NightlyBenchmarkRunner
+
+from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env
+
+KIMI_K2_THINKING_MODEL_PATH = "moonshotai/Kimi-K2-Thinking"
+PROFILE_DIR = "performance_profiles_kimi_k2_thinking"
+
+
+class TestNightlyKimiK2ThinkingPerformance(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = KIMI_K2_THINKING_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.batch_sizes = [1, 1, 8, 16, 64]
+        cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
+        cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))
+
+        # Kimi-K2-Thinking requires specific launch arguments
+        cls.other_args = [
+            "--tp",
+            "8",
+            "--trust-remote-code",
+            "--tool-call-parser",
+            "kimi_k2",
+            "--reasoning-parser",
+            "kimi_k2",
+        ]
+
+        cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
+        cls.runner.setup_profile_directory()
+
+    def test_bench_one_batch(self):
+        results, success = self.runner.run_benchmark_for_model(
+            model_path=self.model,
+            batch_sizes=self.batch_sizes,
+            input_lens=self.input_lens,
+            output_lens=self.output_lens,
+            other_args=self.other_args,
+            extra_bench_args=["--trust-remote-code"],
+        )
+
+        self.runner.add_report(results)
+        self.runner.write_final_report()
+
+        if not success:
+            raise AssertionError(
+                f"Benchmark failed for {self.model}. Check the logs for details."
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/nightly/test_minimax_m2_perf.py b/test/nightly/test_minimax_m2_perf.py
@@ -0,0 +1,49 @@
+import unittest
+
+from nightly_utils import NightlyBenchmarkRunner
+
+from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env
+
+MINIMAX_M2_MODEL_PATH = "MiniMaxAI/MiniMax-M2"
+PROFILE_DIR = "performance_profiles_minimax_m2"
+
+
+class TestNightlyMiniMaxM2Performance(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = MINIMAX_M2_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.batch_sizes = [1, 1, 8, 16, 64]
+        cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
+        cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))
+
+        # MiniMax-M2 is a 230B MoE model with 10B active params
+        cls.other_args = [
+            "--tp",
+            "8",
+            "--trust-remote-code",
+        ]
+
+        cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
+        cls.runner.setup_profile_directory()
+
+    def test_bench_one_batch(self):
+        results, success = self.runner.run_benchmark_for_model(
+            model_path=self.model,
+            batch_sizes=self.batch_sizes,
+            input_lens=self.input_lens,
+            output_lens=self.output_lens,
+            other_args=self.other_args,
+        )
+
+        self.runner.add_report(results)
+        self.runner.write_final_report()
+
+        if not success:
+            raise AssertionError(
+                f"Benchmark failed for {self.model}. Check the logs for details."
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/nightly/test_qwen3_235b_perf.py b/test/nightly/test_qwen3_235b_perf.py
@@ -0,0 +1,49 @@
+import unittest
+
+from nightly_utils import NightlyBenchmarkRunner
+
+from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env
+
+QWEN3_235B_MODEL_PATH = "Qwen/Qwen3-235B-A22B-Instruct-2507"
+PROFILE_DIR = "performance_profiles_qwen3_235b"
+
+
+class TestNightlyQwen3235BPerformance(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = QWEN3_235B_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        cls.batch_sizes = [1, 1, 8, 16, 64]
+        cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
+        cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))
+
+        # Qwen3-235B requires TP=8 for 8 GPUs
+        cls.other_args = [
+            "--tp",
+            "8",
+            "--trust-remote-code",
+        ]
+
+        cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
+        cls.runner.setup_profile_directory()
+
+    def test_bench_one_batch(self):
+        results, success = self.runner.run_benchmark_for_model(
+            model_path=self.model,
+            batch_sizes=self.batch_sizes,
+            input_lens=self.input_lens,
+            output_lens=self.output_lens,
+            other_args=self.other_args,
+        )
+
+        self.runner.add_report(results)
+        self.runner.write_final_report()
+
+        if not success:
+            raise AssertionError(
+                f"Benchmark failed for {self.model}. Check the logs for details."
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()