From 756a331548299d2d2172a9380265a89376acab62 Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 24 Feb 2026 18:34:51 -0600
Subject: [PATCH 01/10] Enabling some B200-specific tests on MI355.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml                      | 131 ++++--------------
 .../configs/Qwen3-Next-FP8-EP2_MI355.yaml     |  11 ++
 tests/evals/gsm8k/configs/models-mi355.txt    |   5 +
 3 files changed, 44 insertions(+), 103 deletions(-)
 create mode 100644 tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml
 create mode 100644 tests/evals/gsm8k/configs/models-mi355.txt

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index ffdf4b83c0e2..6805c38f209f 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -463,17 +463,6 @@ steps:
     - pytest -v -s v1/determinism/test_batch_invariance.py
     - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
 
-- label: V1 Test attention (B200) # 10min
-  timeout_in_minutes: 30
-  gpu: b200
-  source_file_dependencies:
-    - vllm/config/attention.py
-    - vllm/model_executor/layers/attention
-    - vllm/v1/attention
-    - tests/v1/attention
-  commands:
-    - pytest -v -s v1/attention
-
 - label: V1 Test others (CPU) # 5 mins
   mirror_hardwares: [amdexperimental, amdproduction, amdtentative]
   agent_pool: mi325_1
@@ -1124,48 +1113,6 @@ steps:
     # Whisper needs spawn method to avoid deadlock
     - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
 
-- label: Blackwell Test # 21 min
-  timeout_in_minutes: 30
-  working_dir: "/vllm-workspace/"
-  gpu: b200
-  # optional: true
-  source_file_dependencies:
-  - csrc/quantization/fp4/
-  - csrc/attention/mla/
-  - csrc/quantization/cutlass_w8a8/moe/
-  - vllm/model_executor/layers/fused_moe/cutlass_moe.py
-  - vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py
-  - vllm/model_executor/layers/fused_moe/flashinfer_a2a_prepare_finalize.py
-  - vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
-  - vllm/v1/attention/backends/flashinfer.py
-  - vllm/v1/attention/backends/mla/cutlass_mla.py
-  - vllm/v1/attention/backends/mla/flashinfer_mla.py
-  - vllm/v1/attention/selector.py
-  - vllm/platforms/cuda.py
-  commands:
-    - nvidia-smi
-    - python3 examples/offline_inference/basic/chat.py
-    # Attention
-    # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
-    - pytest -v -s tests/kernels/attention/test_attention_selector.py
-    - pytest -v -s tests/kernels/attention/test_flashinfer.py -k 'not num_heads2'
-    - pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_attention.py
-    - pytest -v -s tests/kernels/attention/test_cutlass_mla_decode.py
-    - pytest -v -s tests/kernels/attention/test_flashinfer_mla_decode.py
-    # Quantization
-    - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
-    - pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
-    - pytest -v -s tests/kernels/quantization/test_silu_mul_nvfp4_quant.py
-    - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
-    - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
-    - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
-    - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py
-    - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py
-    - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
-    - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
-    - pytest -v -s tests/kernels/moe/test_flashinfer.py
-    - pytest -v -s tests/kernels/moe/test_cutedsl_moe.py
-
 - label: Blackwell Fusion and Compile Tests # 30 min
   timeout_in_minutes: 40
   working_dir: "/vllm-workspace/"
@@ -1232,16 +1179,6 @@ steps:
   commands:
     - pytest -s -v tests/quantization/test_blackwell_moe.py
 
-- label: Blackwell LM Eval Small Models
-  timeout_in_minutes: 120
-  gpu: b200
-  optional: true # run on nightlies
-  source_file_dependencies:
-  - csrc/
-  - vllm/model_executor/layers/quantization
-  commands:
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt
-
 #####  1 GPU test  #####
 #####  multi gpus test  #####
 
@@ -1647,16 +1584,6 @@ steps:
   commands:
   - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
 
-- label: Qwen3-30B-A3B-FP8-block Accuracy (B200)
-  timeout_in_minutes: 60
-  gpu: b200
-  optional: true
-  num_gpus: 2
-  working_dir: "/vllm-workspace"
-  commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
-
-
 - label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy
   timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental]
@@ -2052,19 +1979,6 @@ steps:
 
 # TODO: Add the "V1 Test attetion (MI300)" test group
 
-- label: V1 Test attention (H100) # 10min
-  mirror_hardwares: [amdexperimental]
-  agent_pool: mi355_1
-  timeout_in_minutes: 30
-  gpu: h100
-  source_file_dependencies:
-    - vllm/config/attention.py
-    - vllm/model_executor/layers/attention
-    - vllm/v1/attention
-    - tests/v1/attention
-  commands:
-    - pytest -v -s v1/attention
-
 - label: Batch Invariance Tests (H100) # 10min
   mirror_hardwares: [amdexperimental]
   agent_pool: mi355_1
@@ -2081,6 +1995,8 @@ steps:
     - pytest -v -s v1/determinism/test_rms_norm_batch_invariant.py
 
 - label: V1 Test attention (B200) # 10min
+  mirror_hardwares: [amdexperimental, amdmi355]
+  agent_pool: mi355_1
   timeout_in_minutes: 30
   gpu: b200
   source_file_dependencies:
@@ -2705,7 +2621,9 @@ steps:
     # Whisper needs spawn method to avoid deadlock
     - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper
 
-- label: Blackwell Test # 21 min
+- label: Blackwell Test (MI355) # 21 min
+  mirror_hardwares: [amdexperimental, amdmi355]
+  agent_pool: mi355_1
   timeout_in_minutes: 30
   working_dir: "/vllm-workspace/"
   gpu: b200
@@ -2724,7 +2642,7 @@ steps:
   - vllm/v1/attention/selector.py
   - vllm/platforms/cuda.py
   commands:
-    - nvidia-smi
+    # rocm-smi
     - python3 examples/offline_inference/basic/chat.py
     # Attention
     # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
@@ -2815,13 +2733,15 @@ steps:
 
 - label: Blackwell LM Eval Small Models
   timeout_in_minutes: 120
+  mirror_hardwares: [amdexperimental, amdproduction, amdmi355]
+  agent_pool: mi355_1
   gpu: b200
   optional: true # run on nightlies
   source_file_dependencies:
   - csrc/
   - vllm/model_executor/layers/quantization
   commands:
-  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt
+  - pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-mi355.txt
 
 #####  1 GPU test  #####
 #####  multi gpus test  #####
@@ -3198,18 +3118,9 @@ steps:
   commands:
   - bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
 
-- label: Qwen3-30B-A3B-FP8-block Accuracy (H100)
-  mirror_hardwares: [amdexperimental, amdproduction]
-  agent_pool: mi355_4
-  timeout_in_minutes: 60
-  gpu: h100
-  optional: true
-  num_gpus: 4
-  working_dir: "/vllm-workspace"
-  commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020
-
-- label: Qwen3-30B-A3B-FP8-block Accuracy (B200)
+- label: Qwen3-30B-A3B-FP8-block Accuracy (B200/MI355)
+  mirror_hardwares: [amdexperimental, amdproduction, amdmi355]
+  agent_pool: mi325_2
   timeout_in_minutes: 60
   gpu: b200
   optional: true
@@ -3218,7 +3129,6 @@ steps:
   commands:
   - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
 
-
 - label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy
   timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental]
@@ -3227,4 +3137,19 @@ steps:
   num_gpus: 4
   working_dir: "/vllm-workspace"
   commands:
-  - bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040
\ No newline at end of file
+  - bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040
+
+  - label: Attention Benchmarks Smoke Test (B200/MI355)
+  device: b200
+  mirror_hardwares: [amdexperimental, amdmi355]
+  agent_pool: mi355_2
+  num_gpus: 2
+  optional: true
+  working_dir: "/vllm-workspace/"
+  timeout_in_minutes: 10
+  source_file_dependencies:
+  - benchmarks/attention_benchmarks/
+  - vllm/v1/attention/
+  commands:
+  - python3 benchmarks/attention_benchmarks/benchmark.py --backends ROCM_ATTN ROCM_AITER_FA ROCM_AITER_UNIFIED_ATTN --batch-specs "8q1s1k" --repeats 1 --warmup-iters 1
+
diff --git a/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml b/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml
new file mode 100644
index 000000000000..b935aa27ee7d
--- /dev/null
+++ b/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml
@@ -0,0 +1,11 @@
+model_name: "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8"
+accuracy_threshold: 0.85
+num_questions: 1319
+num_fewshot: 5
+server_args: >-
+  --max-model-len 4096
+  --tensor-parallel-size 2
+  --enable-expert-parallel
+  --async-scheduling
+env:
+  VLLM_USE_FLASHINFER_MOE_FP8: "0"
diff --git a/tests/evals/gsm8k/configs/models-mi355.txt b/tests/evals/gsm8k/configs/models-mi355.txt
new file mode 100644
index 000000000000..f1122008f597
--- /dev/null
+++ b/tests/evals/gsm8k/configs/models-mi355.txt
@@ -0,0 +1,5 @@
+Qwen3-0.6B-FP8.yaml
+Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
+Qwen1.5-MoE-W4A16-CT.yaml
+DeepSeek-V2-Lite-Instruct-FP8.yaml
+Qwen3-Next-FP8-EP2_MI355.yaml

From af9ebc6c89a82905ae3661c63a3d9780d9060436 Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 24 Feb 2026 18:53:14 -0600
Subject: [PATCH 02/10] Error fix.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 6805c38f209f..ed3a494a271d 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -3139,7 +3139,7 @@ steps:
   commands:
   - bash .buildkite/scripts/scheduled_integration_test/qwen3_next_mtp_async_eplb.sh 0.8 1319 8040
 
-  - label: Attention Benchmarks Smoke Test (B200/MI355)
+- label: Attention Benchmarks Smoke Test (B200/MI355)
   device: b200
   mirror_hardwares: [amdexperimental, amdmi355]
   agent_pool: mi355_2

From 147e2b8903b2434f31cb97ef70732d82d55c8822 Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 24 Feb 2026 20:04:16 -0600
Subject: [PATCH 03/10] Fixing a routing error.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index ed3a494a271d..fc93a1cdb844 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -3120,7 +3120,7 @@ steps:
 
 - label: Qwen3-30B-A3B-FP8-block Accuracy (B200/MI355)
   mirror_hardwares: [amdexperimental, amdproduction, amdmi355]
-  agent_pool: mi325_2
+  agent_pool: mi35dd5_2
   timeout_in_minutes: 60
   gpu: b200
   optional: true

From 7c8025d7caf63be906c8362dfc5c4229956a0618 Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 24 Feb 2026 20:06:51 -0600
Subject: [PATCH 04/10] .

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index fc93a1cdb844..159d192f6d0f 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -3120,7 +3120,7 @@ steps:
 
 - label: Qwen3-30B-A3B-FP8-block Accuracy (B200/MI355)
   mirror_hardwares: [amdexperimental, amdproduction, amdmi355]
-  agent_pool: mi35dd5_2
+  agent_pool: mi355_2
   timeout_in_minutes: 60
   gpu: b200
   optional: true

From 64bf37486808f16366d1b144b251ca4f5b719a1b Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Thu, 26 Feb 2026 11:54:27 -0600
Subject: [PATCH 05/10] Redirecting "Blackwell LM Eval Small Models" to mi355_2

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 159d192f6d0f..85d477b6a70f 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -2734,7 +2734,7 @@ steps:
 - label: Blackwell LM Eval Small Models
   timeout_in_minutes: 120
   mirror_hardwares: [amdexperimental, amdproduction, amdmi355]
-  agent_pool: mi355_1
+  agent_pool: mi355_2
   gpu: b200
   optional: true # run on nightlies
   source_file_dependencies:

From b019c0918da4ed8ae960d921c36ab6d955ea143b Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Fri, 27 Feb 2026 11:56:37 -0600
Subject: [PATCH 06/10] Skipping incompatible FA3.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 tests/kernels/attention/test_attention_selector.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index f021df56c05b..783afec3405c 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -302,6 +302,10 @@ def test_invalid_backend():
         ("FLEX_ATTENTION", None, False),  # Flex does not support
     ],
 )
+@pytest.mark.skipif(
+    current_platform.is_rocm(),
+    reason="Attention backend FA3 is not supported on ROCm. This test can't succeed.",
+)
 def test_per_head_quant_scales_backend_selection(
     backend_name: str, flash_attn_version: int | None, should_succeed: bool
 ):

From beb26d5018b5cad071fc6083f2e758536e219eec Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 3 Mar 2026 15:52:11 -0600
Subject: [PATCH 07/10] Commenting void tests.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 85d477b6a70f..a318d6484db5 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -2647,23 +2647,24 @@ steps:
     # Attention
     # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
     - pytest -v -s tests/kernels/attention/test_attention_selector.py
-    - pytest -v -s tests/kernels/attention/test_flashinfer.py -k 'not num_heads2'
-    - pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_attention.py
-    - pytest -v -s tests/kernels/attention/test_cutlass_mla_decode.py
-    - pytest -v -s tests/kernels/attention/test_flashinfer_mla_decode.py
-    # Quantization
-    - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
-    - pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
-    - pytest -v -s tests/kernels/quantization/test_silu_mul_nvfp4_quant.py
-    - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
-    - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
-    - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
-    - pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py
-    - pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py
-    - pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
-    - pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
-    - pytest -v -s tests/kernels/moe/test_flashinfer.py
-    - pytest -v -s tests/kernels/moe/test_cutedsl_moe.py
+    
+    #- pytest -v -s tests/kernels/attention/test_flashinfer.py -k 'not num_heads2'
+    #- pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_attention.py
+    #- pytest -v -s tests/kernels/attention/test_cutlass_mla_decode.py
+    #- pytest -v -s tests/kernels/attention/test_flashinfer_mla_decode.py
+    ## Quantization
+    #- pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8'
+    #- pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py
+    #- pytest -v -s tests/kernels/quantization/test_silu_mul_nvfp4_quant.py
+    #- pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
+    #- pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py
+    #- pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
+    #- pytest -v -s tests/kernels/quantization/test_nvfp4_qutlass.py
+    #- pytest -v -s tests/kernels/quantization/test_mxfp4_qutlass.py
+    #- pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
+    #- pytest -v -s tests/kernels/moe/test_ocp_mx_moe.py
+    #- pytest -v -s tests/kernels/moe/test_flashinfer.py
+    #- pytest -v -s tests/kernels/moe/test_cutedsl_moe.py
 
 - label: Blackwell Fusion and Compile Tests # 30 min
   timeout_in_minutes: 40

From 3f3b696107ff429d9a432dd57b5ff75957a3b5dc Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 3 Mar 2026 19:38:07 -0600
Subject: [PATCH 08/10] removing empty line as per PR comment.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index a318d6484db5..25f9471a15b8 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -2646,8 +2646,7 @@ steps:
     - python3 examples/offline_inference/basic/chat.py
     # Attention
     # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
-    - pytest -v -s tests/kernels/attention/test_attention_selector.py
-    
+    - pytest -v -s tests/kernels/attention/test_attention_selector.py 
     #- pytest -v -s tests/kernels/attention/test_flashinfer.py -k 'not num_heads2'
     #- pytest -v -s tests/kernels/attention/test_flashinfer_trtllm_attention.py
     #- pytest -v -s tests/kernels/attention/test_cutlass_mla_decode.py

From bce34859512b381697957a501d9b0507b49dd8b1 Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Tue, 3 Mar 2026 19:40:45 -0600
Subject: [PATCH 09/10] Removing non-AMD-specific env. var as per PR comment.

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml b/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml
index b935aa27ee7d..302abf97b110 100644
--- a/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml
+++ b/tests/evals/gsm8k/configs/Qwen3-Next-FP8-EP2_MI355.yaml
@@ -7,5 +7,3 @@ server_args: >-
   --tensor-parallel-size 2
   --enable-expert-parallel
   --async-scheduling
-env:
-  VLLM_USE_FLASHINFER_MOE_FP8: "0"

From 27f5d0ec868b02aa2f88cd5055d432e16d7c2d6d Mon Sep 17 00:00:00 2001
From: "Alexei V. Ivanov" <alexei.ivanov@amd.com>
Date: Wed, 4 Mar 2026 12:47:55 -0600
Subject: [PATCH 10/10] responding to comments

Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
---
 .buildkite/test-amd.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 8ead842e8096..b0a672682560 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -2797,7 +2797,7 @@ steps:
   - vllm/v1/attention/selector.py
   - vllm/platforms/cuda.py
   commands:
-    # rocm-smi
+    rocm-smi
     - python3 examples/offline_inference/basic/chat.py
     # Attention
     # num_heads2 broken by https://github.com/flashinfer-ai/flashinfer/issues/1353
@@ -3288,6 +3288,7 @@ steps:
   commands:
   - bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep_eplb.sh 0.8 200 8020 2 1
 
+
 - label: Qwen3-Next-80B-A3B-Instruct MTP Async EPLB Accuracy
   timeout_in_minutes: 60
   mirror_hardwares: [amdexperimental]