Ascend · Hexq0210 · Apr 8, 2026 · Mar 27, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/.github/workflows/diffusion-ci-gt-gen.yml b/.github/workflows/diffusion-ci-gt-gen.yml
@@ -22,6 +22,10 @@ permissions:
   contents: write
   actions: read
 
+env:
+  SGLANG_IS_IN_CI: true
+  SGLANG_CUDA_COREDUMP: "1"
+
 jobs:
   multimodal-diffusion-gen-1gpu:
     if: github.repository == 'sgl-project/sglang'
@@ -40,6 +44,8 @@ jobs:
         run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion
 
       - name: Generate outputs
+        env:
+          RUNAI_STREAMER_MEMORY_LIMIT: 0
         run: |
           cd python
           python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
@@ -56,6 +62,11 @@ jobs:
           path: python/diffusion-ci-outputs
           retention-days: 7
 
+      - name: Publish GT images to sglang-bot/sglang-ci-data
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+        run: python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir python/diffusion-ci-outputs
+
   multimodal-diffusion-gen-2gpu:
     if: github.repository == 'sgl-project/sglang'
     runs-on: 2-gpu-h100
@@ -73,6 +84,8 @@ jobs:
         run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion
 
       - name: Generate outputs
+        env:
+          RUNAI_STREAMER_MEMORY_LIMIT: 0
         run: |
           cd python
           python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
@@ -89,27 +102,42 @@ jobs:
           path: python/diffusion-ci-outputs
           retention-days: 7
 
-  diffusion-ci-push:
-    needs: [multimodal-diffusion-gen-1gpu, multimodal-diffusion-gen-2gpu]
+      - name: Publish GT images to sglang-bot/sglang-ci-data
+        env:
+          GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
+        run: python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir python/diffusion-ci-outputs
+
+  multimodal-diffusion-gen-b200:
     if: github.repository == 'sgl-project/sglang'
-    runs-on: ubuntu-latest
+    runs-on: 4-gpu-b200
+    timeout-minutes: 240
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
-
-      - name: Download artifacts
-        uses: actions/download-artifact@v4
         with:
-          pattern: diffusion-gen-*
-          path: combined
-          merge-multiple: true
+          ref: ${{ inputs.ref || github.ref }}
+
+      - name: Install dependencies
+        run: bash scripts/ci/cuda/ci_install_dependency.sh diffusion
 
-      - name: Collect image files
+      - name: Generate outputs
+        env:
+          RUNAI_STREAMER_MEMORY_LIMIT: 0
         run: |
-          mkdir -p gt_images
-          find combined \( -name "*.png" -o -name "*.jpg" -o -name "*.jpeg" -o -name "*.webp" \) -type f -exec cp -f {} gt_images/ \;
+          cd python
+          python -m sglang.multimodal_gen.test.scripts.gen_diffusion_ci_outputs \
+            --suite 1-gpu-b200 \
+            --out-dir ./diffusion-ci-outputs \
+            ${{ inputs.case_ids != '' && format('--case-ids {0}', inputs.case_ids) || '' }}
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: diffusion-gen-b200
+          path: python/diffusion-ci-outputs
+          retention-days: 7
 
       - name: Publish GT images to sglang-bot/sglang-ci-data
         env:
           GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_NIGHTLY_CI_DATA }}
-        run: python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir gt_images
+        run: python scripts/ci/utils/diffusion/publish_diffusion_gt.py --source-dir python/diffusion-ci-outputs
diff --git a/.github/workflows/nightly-test-nvidia.yml b/.github/workflows/nightly-test-nvidia.yml
@@ -76,7 +76,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-1-gpu --nightly --continue-on-error
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # JIT kernel full unit tests (expanded parameter ranges via SGLANG_JIT_KERNEL_RUN_FULL_TESTS)
   nightly-test-kernel-1-gpu-h100:
@@ -110,7 +110,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-kernel-1-gpu --nightly --continue-on-error
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   nightly-test-kernel-8-gpu-h200:
     if: github.repository == 'sgl-project/sglang' && (inputs.job_filter == '' || inputs.job_filter == 'all' || inputs.job_filter == 'nightly-test-kernel-8-gpu-h200')
@@ -140,7 +140,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-kernel-8-gpu-h200 --nightly --continue-on-error
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # General tests - 4 GPU H100
   nightly-test-general-4-gpu-h100:
@@ -165,7 +165,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-4-gpu --nightly --continue-on-error
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # General tests - 8 GPU H200
   nightly-test-general-8-gpu-h200:
@@ -249,7 +249,7 @@ jobs:
           if-no-files-found: ignore
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.partition }}
 
@@ -280,7 +280,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-8-gpu-h20 --nightly --continue-on-error
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # General tests - 8 GPU B200
   nightly-test-general-8-gpu-b200:
@@ -353,7 +353,7 @@ jobs:
           if-no-files-found: ignore
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.partition }}
 
@@ -380,7 +380,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-eval-text-2-gpu --nightly --continue-on-error --timeout-per-file 4500
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # Text model performance tests
   nightly-test-text-perf-2-gpu-h100:
@@ -418,7 +418,7 @@ jobs:
           python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_text_models
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # VLM accuracy tests
   nightly-test-vlm-accuracy-2-gpu-h100:
@@ -443,7 +443,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-eval-vlm-2-gpu --nightly --continue-on-error --timeout-per-file 9000
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # VLM performance tests
   nightly-test-vlm-perf-2-gpu-h100:
@@ -481,7 +481,7 @@ jobs:
           python3 scripts/ci/utils/publish_traces.py --traces-dir test/performance_profiles_vlms
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # diffusion performance tests
   nightly-test-multimodal-server-1-gpu:
@@ -538,7 +538,7 @@ jobs:
           if-no-files-found: ignore
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -596,7 +596,7 @@ jobs:
           if-no-files-found: ignore
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -623,7 +623,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-4-gpu-b200 --nightly --continue-on-error --timeout-per-file 12000
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # Specialized B200 tests - 8 GPU, for specific backends and configs
   nightly-test-specialized-8-gpu-b200:
@@ -652,7 +652,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite nightly-8-gpu-b200 --nightly --continue-on-error --timeout-per-file 2400
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # Diffusion cross-framework comparison
   nightly-test-diffusion-comparison:
@@ -716,7 +716,7 @@ jobs:
           if-no-files-found: ignore
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   # Consolidate performance metrics from all jobs
   consolidate-metrics:

diff --git a/.github/workflows/pr-test-multimodal-gen.yml b/.github/workflows/pr-test-multimodal-gen.yml
@@ -100,7 +100,7 @@ jobs:
             $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -155,7 +155,7 @@ jobs:
             $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -175,6 +175,7 @@ jobs:
         with:
           ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }}
 
+      - uses: ./.github/actions/check-stage-health
 
       - uses: ./.github/actions/check-maintenance
 
@@ -203,7 +204,7 @@ jobs:
             $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   multimodal-gen-unit-test:
     if: |

diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
@@ -602,7 +602,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-a-test-1-gpu-small $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   stage-a-test-cpu:
     needs: [check-changes, call-gate]
@@ -711,7 +711,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-small --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.partition }}
 
@@ -767,7 +767,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.partition }}
 
@@ -822,7 +822,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-b-test-2-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.partition }}
 
@@ -880,7 +880,7 @@ jobs:
           python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   call-multimodal-gen-tests:
     needs: [check-changes, call-gate, sgl-kernel-build-wheels]
@@ -962,7 +962,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -1030,7 +1030,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -1086,7 +1086,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -1148,7 +1148,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu-h100 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   stage-c-test-deepep-8-gpu-h200:
     needs: [check-changes, call-gate, wait-for-stage-b]
@@ -1209,7 +1209,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   stage-c-test-4-gpu-b200:
     needs: [check-changes, call-gate, wait-for-stage-b]
@@ -1262,7 +1262,7 @@ jobs:
           python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
         with:
           artifact-suffix: ${{ matrix.part }}
 
@@ -1316,7 +1316,7 @@ jobs:
   #         python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG
   #
   #     - uses: ./.github/actions/upload-cuda-coredumps
-  #       if: always()
+  #       if: failure()
 
   pr-test-finish:
     needs:

diff --git a/.github/workflows/rerun-test.yml b/.github/workflows/rerun-test.yml
@@ -111,7 +111,7 @@ jobs:
           echo "All $total test(s) passed in ${total_elapsed}s"
 
       - uses: ./.github/actions/upload-cuda-coredumps
-        if: always()
+        if: failure()
 
   rerun-test-cpu:
     if: inputs.is_cpu == 'true'
@@ -173,4 +173,4 @@ jobs:
             echo ""
           done
           total_elapsed=$(( SECONDS - suite_start ))
-          echo "All $total test(s) passed in ${total_elapsed}s"
+          echo "All $total test(s) passed in ${total_elapsed}s"