diff --git a/.github/workflows/nightly-test-amd.yml b/.github/workflows/nightly-test-amd.yml index 1adb2ab3f00f..bfde6130fd72 100644 --- a/.github/workflows/nightly-test-amd.yml +++ b/.github/workflows/nightly-test-amd.yml @@ -34,6 +34,7 @@ on: - 'nightly-8-gpu-kimi-k2' # MI35x jobs - 'nightly-test-1-gpu-mi35x' + - 'nightly-8-gpu-mi35x-kimi-k2' - 'nightly-accuracy-8-gpu-mi35x' - 'nightly-8-gpu-mi35x-grok1-int4' - 'nightly-8-gpu-mi35x-grok2' @@ -582,13 +583,13 @@ jobs: bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate - name: Accuracy Test MI35x (8-GPU Grok1-INT4) - timeout-minutes: 60 + timeout-minutes: 90 run: | > github_summary.md # Clear summary file bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ -e RCCL_MSCCL_ENABLE=0 \ -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ - python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 3600 || TEST_EXIT_CODE=$? + python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-grok1-int4 --nightly --timeout-per-file 5400 || TEST_EXIT_CODE=$? echo "$(> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0} @@ -793,6 +794,39 @@ jobs: echo "$(> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0} + # MI35x 8-GPU Kimi-K2 (Accuracy) + nightly-8-gpu-mi35x-kimi-k2: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-8-gpu-mi35x-kimi-k2') + runs-on: linux-mi35x-gpu-8 + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref || github.ref }} + + - name: Setup docker + run: | + touch github_summary.md + bash scripts/ci/amd/amd_ci_start_container.sh + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Install dependencies + run: | + bash scripts/ci/amd/amd_ci_install_dependency.sh + # Install tabulate for run_suite.py (missing in MI35x container) + bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate + + - name: Accuracy Test MI35x (8-GPU Kimi-K2) + timeout-minutes: 180 + run: | + > github_summary.md # Clear summary file + bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ + -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ + python3 run_suite.py --hw amd --suite nightly-amd-accuracy-8-gpu-mi35x-kimi-k2 --nightly --timeout-per-file 7200 || TEST_EXIT_CODE=$? + echo "$(> $GITHUB_STEP_SUMMARY || true + exit ${TEST_EXIT_CODE:-0} + # MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) nightly-perf-8-gpu-mi35x-deepseek-v32-mtp: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-perf-8-gpu-mi35x-deepseek-v32-mtp') @@ -834,10 +868,10 @@ jobs: # MI30x Accuracy Tests - nightly-accuracy-2-gpu - nightly-accuracy-2-gpu-vlm - # MI30x Performance Tests - - nightly-perf-2-gpu-text - - nightly-perf-2-gpu-vlm - nightly-accuracy-8-gpu + # MI30x Performance Tests - excluded from check (perf failures don't block CI) + # - nightly-perf-2-gpu-text + # - nightly-perf-2-gpu-vlm # MI30x Combined Accuracy + Performance Tests - nightly-8-gpu-grok1-int4 - nightly-8-gpu-grok2 @@ -853,6 +887,7 @@ jobs: - nightly-8-gpu-mi35x-deepseek-r1-mxfp4 - nightly-accuracy-8-gpu-mi35x-deepseek-v32 - nightly-accuracy-8-gpu-mi35x-deepseek-v32-mtp + - nightly-8-gpu-mi35x-kimi-k2 # MI35x perf jobs excluded from check - perf failures don't block CI # - nightly-perf-8-gpu-mi35x-deepseek-v32-basic # - nightly-perf-8-gpu-mi35x-deepseek-v32-mtp diff --git a/python/sglang/test/nightly_utils.py b/python/sglang/test/nightly_utils.py index 111eaeb42909..e264c7c21efe 100644 --- a/python/sglang/test/nightly_utils.py +++ b/python/sglang/test/nightly_utils.py @@ -94,6 +94,7 @@ def build_benchmark_command( json_output_file: str, extra_args: Optional[List[str]] = None, server_args: Optional[List[str]] = None, + enable_profile: bool = True, ) -> List[str]: """Build the benchmark command with all required arguments. @@ -106,6 +107,7 @@ def build_benchmark_command( json_output_file: Path to JSON output file extra_args: Optional extra arguments to append to command server_args: Optional server launch arguments to record in metrics + enable_profile: Whether to enable profiling (default True for NVIDIA) Returns: List of command arguments ready for subprocess.run() @@ -125,15 +127,22 @@ def build_benchmark_command( "--output-len", *[str(x) for x in output_lens], "--show-report", - "--profile", - "--profile-by-stage", - "--profile-output-dir", - profile_path_prefix, f"--pydantic-result-filename={json_output_file}", "--no-append-to-github-summary", "--trust-remote-code", ] + # Add profiling flags only if enabled (disabled for AMD tests) + if enable_profile and profile_path_prefix: + command.extend( + [ + "--profile", + "--profile-by-stage", + "--profile-output-dir", + profile_path_prefix, + ] + ) + if extra_args: command.extend(extra_args) @@ -218,6 +227,7 @@ def run_benchmark_for_model( other_args: Optional[List[str]] = None, variant: str = "", extra_bench_args: Optional[List[str]] = None, + enable_profile: bool = True, ) -> Tuple[List[BenchmarkResult], bool, Optional[float]]: """Run a complete benchmark for a single model with server management. @@ -236,6 +246,7 @@ def run_benchmark_for_model( other_args: Arguments to pass to server launch variant: Optional variant suffix (e.g., "basic", "mtp") extra_bench_args: Extra arguments for the benchmark command + enable_profile: Whether to enable profiling (default True for NVIDIA) Returns: Tuple of (list of BenchmarkResult objects, success_bool, avg_spec_accept_length or None) @@ -273,6 +284,7 @@ def run_benchmark_for_model( json_output_file, extra_args=bench_args, server_args=other_args, + enable_profile=enable_profile, ) result, cmd_success = self.run_benchmark_command(command, model_description) diff --git a/test/registered/amd/accuracy/test_deepseek_r1_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_deepseek_r1_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_deepseek_r1_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_deepseek_r1_eval_amd.py diff --git a/test/registered/amd/accuracy/test_deepseek_v31_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_deepseek_v31_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_deepseek_v31_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_deepseek_v31_eval_amd.py diff --git a/test/registered/amd/accuracy/test_deepseek_v32_dp_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_deepseek_v32_dp_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_deepseek_v32_dp_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_deepseek_v32_dp_eval_amd.py diff --git a/test/registered/amd/accuracy/test_deepseek_v32_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_deepseek_v32_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_deepseek_v32_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_deepseek_v32_eval_amd.py diff --git a/test/registered/amd/accuracy/test_deepseek_v32_mtp_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_deepseek_v32_mtp_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_deepseek_v32_mtp_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_deepseek_v32_mtp_eval_amd.py diff --git a/test/registered/amd/accuracy/test_deepseek_v32_tc_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_deepseek_v32_tc_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_deepseek_v32_tc_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_deepseek_v32_tc_eval_amd.py diff --git a/test/registered/amd/accuracy/test_gpt_oss_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_gpt_oss_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_gpt_oss_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_gpt_oss_eval_amd.py diff --git a/test/registered/amd/accuracy/test_grok1_fp8_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_grok1_fp8_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_grok1_fp8_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_grok1_fp8_eval_amd.py diff --git a/test/registered/amd/accuracy/test_grok1_int4_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_grok1_int4_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_grok1_int4_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_grok1_int4_eval_amd.py diff --git a/test/registered/amd/accuracy/test_grok2_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_grok2_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_grok2_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_grok2_eval_amd.py diff --git a/test/registered/amd/accuracy/test_grok_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_grok_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_grok_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_grok_eval_amd.py diff --git a/test/registered/amd/accuracy/test_gsm8k_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_gsm8k_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_gsm8k_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_gsm8k_eval_amd.py diff --git a/test/registered/amd/accuracy/test_kimi_k2_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_kimi_k2_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_kimi_k2_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_kimi_k2_eval_amd.py diff --git a/test/registered/amd/accuracy/test_vlms_mmmu_eval_amd.py b/test/registered/amd/accuracy/mi30x/test_vlms_mmmu_eval_amd.py similarity index 100% rename from test/registered/amd/accuracy/test_vlms_mmmu_eval_amd.py rename to test/registered/amd/accuracy/mi30x/test_vlms_mmmu_eval_amd.py diff --git a/test/registered/amd/accuracy/mi35x/test_gpt_oss_eval_mi35x.py b/test/registered/amd/accuracy/mi35x/test_gpt_oss_eval_mi35x.py index 4c2f8861ef3a..548af0304a7b 100644 --- a/test/registered/amd/accuracy/mi35x/test_gpt_oss_eval_mi35x.py +++ b/test/registered/amd/accuracy/mi35x/test_gpt_oss_eval_mi35x.py @@ -75,7 +75,9 @@ def __post_init__(self): "triton", "--trust-remote-code", ], - env_vars={"SGLANG_USE_AITER": "1"}, + env_vars={ + "SGLANG_USE_AITER": "0" + }, # Disabled due to SWA eviction bug with aiter (#17220) ), ModelConfig( model_path="openai/gpt-oss-120b", @@ -93,7 +95,9 @@ def __post_init__(self): "triton", "--trust-remote-code", ], - env_vars={"SGLANG_USE_AITER": "1"}, + env_vars={ + "SGLANG_USE_AITER": "0" + }, # Disabled due to SWA eviction bug with aiter (#17220) ), ] diff --git a/test/registered/amd/accuracy/mi35x/test_grok1_int4_eval_mi35x.py b/test/registered/amd/accuracy/mi35x/test_grok1_int4_eval_mi35x.py index afe5dd89c765..872b76287b19 100644 --- a/test/registered/amd/accuracy/mi35x/test_grok1_int4_eval_mi35x.py +++ b/test/registered/amd/accuracy/mi35x/test_grok1_int4_eval_mi35x.py @@ -23,9 +23,9 @@ ) from sglang.utils import download_and_cache_file, read_jsonl -# Register for AMD CI - GROK1-INT4 accuracy tests on MI35x (~25 min) +# Register for AMD CI - GROK1-INT4 accuracy tests on MI35x (~70 min) register_amd_ci( - est_time=1500, suite="nightly-amd-accuracy-8-gpu-mi35x-grok1-int4", nightly=True + est_time=4200, suite="nightly-amd-accuracy-8-gpu-mi35x-grok1-int4", nightly=True ) INVALID = -9999999 diff --git a/test/registered/amd/accuracy/mi35x/test_grok2_eval_mi35x.py b/test/registered/amd/accuracy/mi35x/test_grok2_eval_mi35x.py index 33f826f7cbb9..a639bf4b7724 100644 --- a/test/registered/amd/accuracy/mi35x/test_grok2_eval_mi35x.py +++ b/test/registered/amd/accuracy/mi35x/test_grok2_eval_mi35x.py @@ -105,7 +105,7 @@ class TestGrok2EvalMI35x(unittest.TestCase): def setUpClass(cls): cls.base_url = DEFAULT_URL_FOR_TEST cls.num_questions = int(os.environ.get("GSM8K_NUM_QUESTIONS", "200")) - cls.accuracy_threshold = 0.915 + cls.accuracy_threshold = 0.90 def test_grok2_accuracy(self): """Test Grok-2 with GSM8K completion benchmark.""" diff --git a/test/registered/amd/accuracy/mi35x/test_kimi_k2_eval_mi35x.py b/test/registered/amd/accuracy/mi35x/test_kimi_k2_eval_mi35x.py new file mode 100644 index 000000000000..53d84014700a --- /dev/null +++ b/test/registered/amd/accuracy/mi35x/test_kimi_k2_eval_mi35x.py @@ -0,0 +1,105 @@ +"""MI35x Kimi-K2 GSM8K Completion Evaluation Test (8-GPU) + +Tests moonshotai/Kimi-K2-Instruct-0905 with GSM8K few-shot benchmark on MI35x. + +Registry: nightly-amd-accuracy-8-gpu-mi35x-kimi-k2 suite +""" + +import os +import unittest +from types import SimpleNamespace + +import requests + +from sglang.srt.utils import kill_process_tree +from sglang.test.ci.ci_register import register_amd_ci +from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k +from sglang.test.test_utils import ( + DEFAULT_URL_FOR_TEST, + CustomTestCase, + is_in_ci, + popen_launch_server, + write_github_step_summary, +) + +# Register for AMD CI - Kimi K2 accuracy test on MI35x (~60 min) +register_amd_ci( + est_time=3600, suite="nightly-amd-accuracy-8-gpu-mi35x-kimi-k2", nightly=True +) + +KIMI_K2_MODEL_PATH = "moonshotai/Kimi-K2-Instruct-0905" +SERVER_LAUNCH_TIMEOUT = 3600 +ACCURACY_THRESHOLD = 0.94 + + +class TestKimiK2EvalMI35x(CustomTestCase): + """Kimi-K2 GSM8K Completion Evaluation Test for AMD MI35x.""" + + @classmethod + def setUpClass(cls): + cls.base_url = DEFAULT_URL_FOR_TEST + + def test_kimi_k2_gsm8k_accuracy(self): + """Test Kimi-K2 with GSM8K few-shot completion benchmark.""" + other_args = [ + "--tp", + "8", + "--decode-attention-backend", + "triton", + "--prefill-attention-backend", + "aiter", + "--trust-remote-code", + "--model-loader-extra-config", + '{"enable_multithread_load": true}', + "--watchdog-timeout", + "1200", + ] + env = os.environ.copy() + env["SGLANG_USE_AITER"] = "1" + env["SGLANG_ROCM_FUSED_DECODE_MLA"] = "0" + + process = popen_launch_server( + KIMI_K2_MODEL_PATH, + self.base_url, + timeout=SERVER_LAUNCH_TIMEOUT, + other_args=other_args, + env=env, + ) + + try: + requests.get(self.base_url + "/flush_cache") + + args = SimpleNamespace( + num_shots=8, + data_path=None, + num_questions=1319, + parallel=1319, + max_new_tokens=512, + host="http://127.0.0.1", + port=int(self.base_url.split(":")[-1]), + ) + metrics = run_eval_few_shot_gsm8k(args) + acc = metrics["accuracy"] + + passed = acc >= ACCURACY_THRESHOLD + status = "✅ PASS" if passed else "❌ FAIL" + print(f" accuracy={acc:.3f} threshold={ACCURACY_THRESHOLD} {status}") + + if is_in_ci(): + summary = "### Kimi-K2 Model (MI35x)\n\n" + summary += "| Model | TP | Accuracy | Threshold | Status |\n" + summary += "| ----- | -- | -------- | --------- | ------ |\n" + summary += f"| {KIMI_K2_MODEL_PATH} | 8 | {acc:.3f} | {ACCURACY_THRESHOLD} | {status} |\n" + write_github_step_summary(summary) + + self.assertGreaterEqual( + acc, + ACCURACY_THRESHOLD, + f"Kimi-K2 accuracy {acc:.3f} below threshold {ACCURACY_THRESHOLD}", + ) + finally: + kill_process_tree(process.pid) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/registered/amd/perf/test_deepseek_v31_perf.py b/test/registered/amd/perf/mi30x/test_deepseek_v31_perf.py similarity index 98% rename from test/registered/amd/perf/test_deepseek_v31_perf.py rename to test/registered/amd/perf/mi30x/test_deepseek_v31_perf.py index 5c8c50d991d8..eca18407236a 100644 --- a/test/registered/amd/perf/test_deepseek_v31_perf.py +++ b/test/registered/amd/perf/mi30x/test_deepseek_v31_perf.py @@ -129,6 +129,7 @@ def test_bench_one_batch(self): other_args=variant_config["other_args"], variant=variant_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_deepseek_v32_basic_perf_amd.py b/test/registered/amd/perf/mi30x/test_deepseek_v32_basic_perf_amd.py similarity index 98% rename from test/registered/amd/perf/test_deepseek_v32_basic_perf_amd.py rename to test/registered/amd/perf/mi30x/test_deepseek_v32_basic_perf_amd.py index 56d0fc433c56..9b78008f1abd 100644 --- a/test/registered/amd/perf/test_deepseek_v32_basic_perf_amd.py +++ b/test/registered/amd/perf/mi30x/test_deepseek_v32_basic_perf_amd.py @@ -115,6 +115,7 @@ def test_bench_one_batch(self): other_args=self.variant_config["other_args"], variant=self.variant_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_deepseek_v32_mtp_perf_amd.py b/test/registered/amd/perf/mi30x/test_deepseek_v32_mtp_perf_amd.py similarity index 98% rename from test/registered/amd/perf/test_deepseek_v32_mtp_perf_amd.py rename to test/registered/amd/perf/mi30x/test_deepseek_v32_mtp_perf_amd.py index dc316c77deaa..0dc0c7b523bd 100644 --- a/test/registered/amd/perf/test_deepseek_v32_mtp_perf_amd.py +++ b/test/registered/amd/perf/mi30x/test_deepseek_v32_mtp_perf_amd.py @@ -124,6 +124,7 @@ def test_bench_one_batch(self): other_args=self.variant_config["other_args"], variant=self.variant_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_deepseek_v3_perf.py b/test/registered/amd/perf/mi30x/test_deepseek_v3_perf.py similarity index 98% rename from test/registered/amd/perf/test_deepseek_v3_perf.py rename to test/registered/amd/perf/mi30x/test_deepseek_v3_perf.py index 02e009aa952e..6f0cd52a1f46 100644 --- a/test/registered/amd/perf/test_deepseek_v3_perf.py +++ b/test/registered/amd/perf/mi30x/test_deepseek_v3_perf.py @@ -119,6 +119,7 @@ def test_bench_one_batch(self): other_args=variant_config["other_args"], variant=variant_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_grok1_fp8_perf.py b/test/registered/amd/perf/mi30x/test_grok1_fp8_perf.py similarity index 98% rename from test/registered/amd/perf/test_grok1_fp8_perf.py rename to test/registered/amd/perf/mi30x/test_grok1_fp8_perf.py index b74cb8e22b45..7e04096eb9ba 100644 --- a/test/registered/amd/perf/test_grok1_fp8_perf.py +++ b/test/registered/amd/perf/mi30x/test_grok1_fp8_perf.py @@ -109,6 +109,7 @@ def test_bench_grok1_fp8(self): other_args=self.model_config["other_args"], variant=self.model_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_grok1_int4_perf.py b/test/registered/amd/perf/mi30x/test_grok1_int4_perf.py similarity index 98% rename from test/registered/amd/perf/test_grok1_int4_perf.py rename to test/registered/amd/perf/mi30x/test_grok1_int4_perf.py index a2164a8f98ff..07c67c0da246 100644 --- a/test/registered/amd/perf/test_grok1_int4_perf.py +++ b/test/registered/amd/perf/mi30x/test_grok1_int4_perf.py @@ -119,6 +119,7 @@ def test_bench_grok1_int4(self): other_args=self.model_config["other_args"], variant=self.model_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_grok2_perf.py b/test/registered/amd/perf/mi30x/test_grok2_perf.py similarity index 98% rename from test/registered/amd/perf/test_grok2_perf.py rename to test/registered/amd/perf/mi30x/test_grok2_perf.py index e5b66b782f75..af089ff50edd 100644 --- a/test/registered/amd/perf/test_grok2_perf.py +++ b/test/registered/amd/perf/mi30x/test_grok2_perf.py @@ -121,6 +121,7 @@ def test_bench_grok2(self): other_args=self.model_config["other_args"], variant=self.model_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_text_models_perf_amd.py b/test/registered/amd/perf/mi30x/test_text_models_perf_amd.py similarity index 98% rename from test/registered/amd/perf/test_text_models_perf_amd.py rename to test/registered/amd/perf/mi30x/test_text_models_perf_amd.py index d03788ee2220..66b90a52fb89 100644 --- a/test/registered/amd/perf/test_text_models_perf_amd.py +++ b/test/registered/amd/perf/mi30x/test_text_models_perf_amd.py @@ -110,6 +110,7 @@ def test_bench_one_batch(self): input_lens=self.input_lens, output_lens=self.output_lens, other_args=other_args, + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/test_vlms_perf_amd.py b/test/registered/amd/perf/mi30x/test_vlms_perf_amd.py similarity index 98% rename from test/registered/amd/perf/test_vlms_perf_amd.py rename to test/registered/amd/perf/mi30x/test_vlms_perf_amd.py index 92f6a1fc58f1..fe638ae97f0b 100644 --- a/test/registered/amd/perf/test_vlms_perf_amd.py +++ b/test/registered/amd/perf/mi30x/test_vlms_perf_amd.py @@ -123,6 +123,7 @@ def test_bench_one_batch(self): output_lens=self.output_lens, other_args=other_args, extra_bench_args=extra_bench_args, + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py b/test/registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py index 01be06ebde8d..4530e2f4b4b1 100644 --- a/test/registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py +++ b/test/registered/amd/perf/mi35x/test_deepseek_r1_mxfp4_perf_mi35x.py @@ -152,6 +152,7 @@ def test_bench_one_batch(self): other_args=variant_config["other_args"], variant=variant_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/mi35x/test_deepseek_v32_basic_perf_mi35x.py b/test/registered/amd/perf/mi35x/test_deepseek_v32_basic_perf_mi35x.py index a9bfad3c9eb1..54abe22f390e 100644 --- a/test/registered/amd/perf/mi35x/test_deepseek_v32_basic_perf_mi35x.py +++ b/test/registered/amd/perf/mi35x/test_deepseek_v32_basic_perf_mi35x.py @@ -114,6 +114,7 @@ def test_bench_one_batch(self): other_args=self.variant_config["other_args"], variant=self.variant_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/mi35x/test_deepseek_v32_mtp_perf_mi35x.py b/test/registered/amd/perf/mi35x/test_deepseek_v32_mtp_perf_mi35x.py index 39ec2cc187ba..6a0445126b0a 100644 --- a/test/registered/amd/perf/mi35x/test_deepseek_v32_mtp_perf_mi35x.py +++ b/test/registered/amd/perf/mi35x/test_deepseek_v32_mtp_perf_mi35x.py @@ -96,6 +96,7 @@ def _run_benchmark_with_timeout( profile_path_prefix, json_output_file, extra_args=bench_args, + enable_profile=False, # Disable profiling for AMD tests ) _, cmd_success = runner.run_benchmark_command(command, model_description) if not cmd_success: diff --git a/test/registered/amd/perf/mi35x/test_grok1_int4_perf_mi35x.py b/test/registered/amd/perf/mi35x/test_grok1_int4_perf_mi35x.py index 489d62eda915..0e23f7b739b6 100644 --- a/test/registered/amd/perf/mi35x/test_grok1_int4_perf_mi35x.py +++ b/test/registered/amd/perf/mi35x/test_grok1_int4_perf_mi35x.py @@ -112,6 +112,7 @@ def test_grok1_int4_perf(self): other_args=self.model_config["other_args"], variant=self.model_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1] diff --git a/test/registered/amd/perf/mi35x/test_grok2_perf_mi35x.py b/test/registered/amd/perf/mi35x/test_grok2_perf_mi35x.py index 8e3ba7231b32..62dc28a00677 100644 --- a/test/registered/amd/perf/mi35x/test_grok2_perf_mi35x.py +++ b/test/registered/amd/perf/mi35x/test_grok2_perf_mi35x.py @@ -112,6 +112,7 @@ def test_grok2_perf(self): other_args=self.model_config["other_args"], variant=self.model_config["name"], extra_bench_args=["--trust-remote-code"], + enable_profile=False, # Disable profiling for AMD tests ) results = result_tuple[0] success = result_tuple[1]