SemiAnalysisAI · cquil11 · Dec 15, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 11, 2025
diff --git a/.github/workflows/full-sweep-1k1k-scheduler.yml b/.github/workflows/full-sweep-1k1k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k1k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:

diff --git a/.github/workflows/full-sweep-1k8k-scheduler.yml b/.github/workflows/full-sweep-1k8k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 1k8k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:

diff --git a/.github/workflows/full-sweep-8k1k-scheduler.yml b/.github/workflows/full-sweep-8k1k-scheduler.yml
@@ -2,8 +2,6 @@ name: "Full Sweep Scheduler - 8k1k"
 
 on:
     workflow_dispatch:
-    schedule:
-        - cron: "0 0 * * *"
 
 jobs:
     get-dsr1-configs:

diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml
@@ -0,0 +1,233 @@
+name: "Run Sweep"
+run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }}
+
+concurrency:
+    group: sweep-${{ github.event.pull_request.number || github.ref }}
+    cancel-in-progress: true
+
+on:
+    push:
+        branches:
+            - main
+        paths:
+            - "perf-changelog.yaml"
+    pull_request:
+        branches:
+            - main
+        types:
+            - ready_for_review
+            - synchronize
+            - labeled
+        paths:
+            - "perf-changelog.yaml"
+
+jobs:
+    setup:
+        runs-on: ubuntu-latest
+        if: >-
+            (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) ||
+            (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]'))
+        outputs:
+            search-space-config: ${{ steps.setup.outputs.search-space-config }}
+        steps:
+            - name: Checkout code
+              uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  fetch-depth: 0
+
+            - id: setup
+              run: |
+                  pip install pydantic
+
+                  if [ "${{ github.event_name }}" == "pull_request" ]; then
+                      BASE_REF="origin/${{ github.base_ref }}"
+                      HEAD_REF="${{ github.event.pull_request.head.sha }}"
+                  else
+                      BASE_REF="${{ github.event.before }}"
+                      HEAD_REF="${{ github.event.after }}"
+                  fi
+
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \
+                      --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \
+                      --base-ref "$BASE_REF" \
+                      --head-ref "$HEAD_REF")
+
+                  echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
+
+    sweep-multi-node-1k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 1k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
+        secrets: inherit
+        with: &multi-node-inputs
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            exp-name: ${{ matrix.config.exp-name }}
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+    sweep-multi-node-1k8k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 1k8k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }}
+        secrets: inherit
+        with: *multi-node-inputs
+
+    sweep-multi-node-8k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: multi-node 8k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }}
+        secrets: inherit
+        with: *multi-node-inputs
+
+    sweep-single-node-1k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 1k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }}
+        secrets: inherit
+        with: &single-node-inputs
+            exp-name: ${{ matrix.config.exp-name }}
+            isl: ${{ matrix.config.isl }}
+            osl: ${{ matrix.config.osl }}
+            max-model-len: ${{ matrix.config.max-model-len }}
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.conc }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+    sweep-single-node-1k8k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 1k8k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }}
+        secrets: inherit
+        with: *single-node-inputs
+
+    sweep-single-node-8k1k:
+        needs: setup
+        if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: single-node 8k1k /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }}
+        secrets: inherit
+        with: *single-node-inputs
+
+    collect-results:
+        needs:
+            [
+                sweep-single-node-1k1k,
+                sweep-single-node-1k8k,
+                sweep-single-node-8k1k,
+                sweep-multi-node-1k1k,
+                sweep-multi-node-1k8k,
+                sweep-multi-node-8k1k,
+                setup,
+            ]
+        if: ${{ always() && needs.setup.result != 'skipped' }}
+        uses: ./.github/workflows/collect-results.yml
+        secrets: inherit
+
+    upload-changelog-metadata:
+        needs: [setup, collect-results]
+        if: ${{ needs.setup.result != 'skipped' }}
+        runs-on: ubuntu-latest
+        steps:
+            - name: Extract and save changelog metadata
+              env:
+                  CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }}
+              run: |
+                  echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json
+
+            - name: Upload changelog artifact
+              uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: changelog-metadata
+                  path: changelog_metadata.json
+
+    calc-success-rate:
+        needs: collect-results
+        if: ${{ always() && needs.collect-results.result != 'skipped'}}
+        runs-on: ubuntu-latest
+
+        env:
+            RESULTS_DIR: "results/"
+            STATS_FILENAME: "run_stats"
+            GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
+
+        steps:
+            - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  token: ${{ secrets.REPO_PAT }}
+                  fetch-depth: 0
+
+            - name: Download results artifacts
+              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              with:
+                  path: ${{ env.RESULTS_DIR }}
+                  pattern: results_*
+
+            - name: Install python dependencies
+              run: pip install PyGithub
+
+            - name: Calculate success rate
+              run: python3 utils/calc_success_rate.py $STATS_FILENAME
+
+            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: "run-stats"
+                  path: ${{ env.STATS_FILENAME }}.json
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -0,0 +1,9 @@
+- config-keys:
+    - gptoss-fp4-*-trt
+  description: |
+    - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1'
+    - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh
+    - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM
+    PR: https://github.com/InferenceMAX/InferenceMAX/pull/110
+
diff --git a/utils/constants.py b/utils/constants.py
@@ -0,0 +1,4 @@
+MASTER_CONFIGS = [".github/configs/amd-master.yaml",
+                  ".github/configs/nvidia-master.yaml"]
+RUNNER_CONFIG = ".github/configs/runners.yaml"
+GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py"