diff --git a/.github/workflows/nightly-test-amd-rocm720.yml b/.github/workflows/nightly-test-amd-rocm720.yml index d38a4f10f7fc..2c0d3a120e5e 100644 --- a/.github/workflows/nightly-test-amd-rocm720.yml +++ b/.github/workflows/nightly-test-amd-rocm720.yml @@ -61,6 +61,7 @@ on: - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720 - nightly-8-gpu-mi35x-qwen35-rocm720 - nightly-8-gpu-mi35x-glm5-rocm720 + - nightly-8-gpu-mi35x-glm47-fp8-rocm720 - nightly-8-gpu-mi35x-minimax-m25-rocm720 job_filter: description: 'Or type comma-separated job names (overrides dropdown if non-empty)' @@ -1272,6 +1273,39 @@ jobs: echo "$(> $GITHUB_STEP_SUMMARY || true exit ${TEST_EXIT_CODE:-0} + # MI35x 8-GPU GLM-4.7-FP8 (Accuracy) ROCm 7.2 + nightly-8-gpu-mi35x-glm47-fp8-rocm720: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm47-fp8-rocm720,')) + runs-on: linux-mi35x-gpu-8 + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: ${{ inputs.ref || github.ref }} + + - name: Setup docker (ROCm 7.2) + run: | + touch github_summary.md + bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720 + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Install dependencies + run: | + bash scripts/ci/amd/amd_ci_install_dependency.sh + # Install tabulate for run_suite.py (missing in MI35x container) + bash scripts/ci/amd/amd_ci_exec.sh pip install tabulate + + - name: Accuracy Test MI35x ROCm 7.2 (8-GPU GLM-4.7-FP8) + timeout-minutes: 120 + run: | + > github_summary.md # Clear summary file + bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \ + -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \ + python3 run_suite.py --hw amd --suite nightly-amd-8-gpu-mi35x-glm47-fp8 --nightly --timeout-per-file 3600 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$? + echo "$(> $GITHUB_STEP_SUMMARY || true + exit ${TEST_EXIT_CODE:-0} + # MI35x 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2 nightly-8-gpu-mi35x-minimax-m25-rocm720: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25-rocm720,')) @@ -1382,6 +1416,7 @@ jobs: - nightly-8-gpu-mi35x-qwen3-235b-mxfp4-rocm720 - nightly-8-gpu-mi35x-qwen35-rocm720 - nightly-8-gpu-mi35x-glm5-rocm720 + - nightly-8-gpu-mi35x-glm47-fp8-rocm720 - nightly-8-gpu-mi35x-minimax-m25-rocm720 runs-on: ubuntu-latest steps: diff --git a/test/registered/amd/accuracy/mi35x/test_glm47_fp8_eval_mi35x.py b/test/registered/amd/accuracy/mi35x/test_glm47_fp8_eval_mi35x.py new file mode 100644 index 000000000000..31c9ea41108c --- /dev/null +++ b/test/registered/amd/accuracy/mi35x/test_glm47_fp8_eval_mi35x.py @@ -0,0 +1,61 @@ +"""MI35x GLM-4.7-FP8 GSM8K Accuracy Evaluation Test (8-GPU) + +Tests GLM-4.7-FP8 accuracy using GSM8K benchmark on MI35x. + +Registry: nightly-amd-8-gpu-mi35x-glm47-fp8 suite +""" + +import os + +# Set HF cache for MI35x +os.environ.setdefault("HF_HOME", "/data2/models/huggingface") +os.environ.setdefault("HF_HUB_CACHE", "/data2/models/huggingface/hub") + +import unittest + +from sglang.test.accuracy_test_runner import AccuracyTestParams +from sglang.test.ci.ci_register import register_amd_ci +from sglang.test.run_combined_tests import run_combined_tests +from sglang.test.test_utils import ModelLaunchSettings + +# Register for AMD CI - MI35x GLM-4.7-FP8 accuracy test (~30 min) +register_amd_ci( + est_time=1800, + suite="nightly-amd-8-gpu-mi35x-glm47-fp8", + nightly=True, +) + +GLM_4_7_FP8_MODEL_PATH = "zai-org/GLM-4.7-FP8" + + +class TestGLM47FP8EvalMI35x(unittest.TestCase): + """GLM-4.7-FP8 GSM8K Accuracy Evaluation Test for MI35x.""" + + def test_glm_47_fp8(self): + """Run accuracy test for GLM-4.7-FP8.""" + base_args = [ + "--trust-remote-code", + "--tool-call-parser=glm47", + "--reasoning-parser=glm45", + ] + + variants = [ + ModelLaunchSettings( + GLM_4_7_FP8_MODEL_PATH, + tp_size=8, + extra_args=base_args, + variant="TP8", + ), + ] + + run_combined_tests( + models=variants, + test_name="GLM-4.7-FP8", + accuracy_params=AccuracyTestParams( + dataset="gsm8k", baseline_accuracy=0.92 + ), + ) + + +if __name__ == "__main__": + unittest.main()