-
Notifications
You must be signed in to change notification settings - Fork 179
feat: performance changelog triggered runs (as opposed to nightly) #267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
433f2ef
dd4682b
7d6e052
ce49098
e6f6fe9
b87eedd
ba0b115
747bc2d
ca24b8e
954ebd6
ee346b3
ab6f948
27074d2
763b394
d0b2de7
41341ad
f131962
dfeba21
fd07f40
228e0a2
cb2cc8a
055b324
ae65551
667d2e1
ef3ba6b
fae8278
2018ad3
5e0c779
8d8ffa1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,233 @@ | ||
| name: "Run Sweep" | ||
| run-name: Run Sweep - ${{ github.event.pull_request.title || github.ref_name }} | ||
|
|
||
| concurrency: | ||
| group: sweep-${{ github.event.pull_request.number || github.ref }} | ||
| cancel-in-progress: true | ||
|
|
||
| on: | ||
| push: | ||
| branches: | ||
| - main | ||
| paths: | ||
| - "perf-changelog.yaml" | ||
| pull_request: | ||
| branches: | ||
| - main | ||
| types: | ||
| - ready_for_review | ||
| - synchronize | ||
| - labeled | ||
| paths: | ||
| - "perf-changelog.yaml" | ||
|
|
||
| jobs: | ||
| setup: | ||
| runs-on: ubuntu-latest | ||
| if: >- | ||
| (github.event_name == 'pull_request' && !github.event.pull_request.draft && contains(github.event.pull_request.labels.*.name, 'sweep-enabled')) || | ||
| (github.event_name != 'pull_request' && !contains(github.event.head_commit.message, '[skip-sweep]')) | ||
| outputs: | ||
| search-space-config: ${{ steps.setup.outputs.search-space-config }} | ||
| steps: | ||
| - name: Checkout code | ||
| uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 | ||
| with: | ||
| fetch-depth: 0 | ||
|
|
||
| - id: setup | ||
| run: | | ||
| pip install pydantic | ||
|
|
||
| if [ "${{ github.event_name }}" == "pull_request" ]; then | ||
| BASE_REF="origin/${{ github.base_ref }}" | ||
| HEAD_REF="${{ github.event.pull_request.head.sha }}" | ||
| else | ||
| BASE_REF="${{ github.event.before }}" | ||
| HEAD_REF="${{ github.event.after }}" | ||
| fi | ||
|
|
||
| CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/process_changelog.py \ | ||
| --changelog-file ${GITHUB_WORKSPACE}/perf-changelog.yaml \ | ||
| --base-ref "$BASE_REF" \ | ||
| --head-ref "$HEAD_REF") | ||
|
|
||
| echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT | ||
|
|
||
| sweep-multi-node-1k1k: | ||
| needs: setup | ||
| if: ${{ needs.setup.outputs.search-space-config.multi_node['1k1k'] != '[]' }} | ||
|
chunfangamd marked this conversation as resolved.
Outdated
|
||
| uses: ./.github/workflows/benchmark-multinode-tmpl.yml | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The sweep jobs gate on Useful? React with 👍 / 👎. |
||
| name: multi-node 1k1k / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }} | ||
| secrets: inherit | ||
| with: &multi-node-inputs | ||
| isl: ${{ matrix.config.isl }} | ||
| osl: ${{ matrix.config.osl }} | ||
| max-model-len: ${{ matrix.config.max-model-len }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| conc-list: ${{ toJson(matrix.config.conc) }} | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
|
|
||
| prefill-num-worker: ${{ matrix.config.prefill.num-worker }} | ||
| prefill-tp: ${{ matrix.config.prefill.tp }} | ||
| prefill-ep: ${{ matrix.config.prefill.ep }} | ||
| prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} | ||
| prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} | ||
|
|
||
| decode-num-worker: ${{ matrix.config.decode.num-worker }} | ||
| decode-tp: ${{ matrix.config.decode.tp }} | ||
| decode-ep: ${{ matrix.config.decode.ep }} | ||
| decode-dp-attn: ${{ matrix.config.decode.dp-attn }} | ||
| decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} | ||
|
|
||
| sweep-multi-node-1k8k: | ||
| needs: setup | ||
| if: ${{ needs.setup.outputs.search-space-config.multi_node['1k8k'] != '[]' }} | ||
| uses: ./.github/workflows/benchmark-multinode-tmpl.yml | ||
| name: multi-node 1k8k / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k8k'] }} | ||
| secrets: inherit | ||
| with: *multi-node-inputs | ||
|
|
||
| sweep-multi-node-8k1k: | ||
| needs: setup | ||
| if: ${{ needs.setup.outputs.search-space-config.multi_node['8k1k'] != '[]' }} | ||
| uses: ./.github/workflows/benchmark-multinode-tmpl.yml | ||
| name: multi-node 8k1k / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }} | ||
| secrets: inherit | ||
| with: *multi-node-inputs | ||
|
|
||
| sweep-single-node-1k1k: | ||
| needs: setup | ||
| if: ${{ needs.setup.outputs.search-space-config.single_node['1k1k'] != '[]' }} | ||
| uses: ./.github/workflows/benchmark-tmpl.yml | ||
| name: single-node 1k1k / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} | ||
| secrets: inherit | ||
| with: &single-node-inputs | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| isl: ${{ matrix.config.isl }} | ||
| osl: ${{ matrix.config.osl }} | ||
| max-model-len: ${{ matrix.config.max-model-len }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| tp: ${{ matrix.config.tp }} | ||
| ep: ${{ matrix.config.ep }} | ||
| dp-attn: ${{ matrix.config.dp-attn }} | ||
| conc: ${{ matrix.config.conc }} | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
|
|
||
| sweep-single-node-1k8k: | ||
| needs: setup | ||
| if: ${{ needs.setup.outputs.search-space-config.single_node['1k8k'] != '[]' }} | ||
| uses: ./.github/workflows/benchmark-tmpl.yml | ||
| name: single-node 1k8k / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k8k'] }} | ||
| secrets: inherit | ||
| with: *single-node-inputs | ||
|
|
||
| sweep-single-node-8k1k: | ||
| needs: setup | ||
| if: ${{ needs.setup.outputs.search-space-config.single_node['8k1k'] != '[]' }} | ||
| uses: ./.github/workflows/benchmark-tmpl.yml | ||
| name: single-node 8k1k / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} | ||
| secrets: inherit | ||
| with: *single-node-inputs | ||
|
|
||
| collect-results: | ||
| needs: | ||
| [ | ||
| sweep-single-node-1k1k, | ||
| sweep-single-node-1k8k, | ||
| sweep-single-node-8k1k, | ||
| sweep-multi-node-1k1k, | ||
| sweep-multi-node-1k8k, | ||
| sweep-multi-node-8k1k, | ||
| setup, | ||
| ] | ||
| if: ${{ always() && needs.setup.result != 'skipped' }} | ||
| uses: ./.github/workflows/collect-results.yml | ||
| secrets: inherit | ||
|
|
||
| upload-changelog-metadata: | ||
| needs: [setup, collect-results] | ||
| if: ${{ needs.setup.result != 'skipped' }} | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - name: Extract and save changelog metadata | ||
| env: | ||
| CONFIG_JSON: ${{ needs.setup.outputs.search-space-config }} | ||
| run: | | ||
| echo "$CONFIG_JSON" | jq '.changelog_metadata' > changelog_metadata.json | ||
|
|
||
| - name: Upload changelog artifact | ||
| uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 | ||
| with: | ||
| name: changelog-metadata | ||
| path: changelog_metadata.json | ||
|
|
||
| calc-success-rate: | ||
| needs: collect-results | ||
| if: ${{ always() && needs.collect-results.result != 'skipped'}} | ||
| runs-on: ubuntu-latest | ||
|
|
||
| env: | ||
| RESULTS_DIR: "results/" | ||
| STATS_FILENAME: "run_stats" | ||
| GITHUB_TOKEN: ${{ secrets.REPO_PAT }} | ||
|
|
||
| steps: | ||
| - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 | ||
| with: | ||
| token: ${{ secrets.REPO_PAT }} | ||
| fetch-depth: 0 | ||
|
|
||
| - name: Download results artifacts | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0 | ||
| with: | ||
| path: ${{ env.RESULTS_DIR }} | ||
| pattern: results_* | ||
|
|
||
| - name: Install python dependencies | ||
| run: pip install PyGithub | ||
|
|
||
| - name: Calculate success rate | ||
| run: python3 utils/calc_success_rate.py $STATS_FILENAME | ||
|
|
||
| - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 | ||
| with: | ||
| name: "run-stats" | ||
| path: ${{ env.STATS_FILENAME }}.json | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| - config-keys: | ||
| - gptoss-fp4-*-trt | ||
| description: | | ||
| - Upgrade GPT-OSS TRT images from 'release:1.1.0rc2.post2' to '1.2.0rc0.post1' | ||
| - Add NCCL_GRAPH_REGISTER=0 to benchmarks/gptoss_fp4_b200_trt_slurm.sh | ||
| - Change kv_cache_config.dtype from 'auto' to 'fp8' in benchmarks/gptoss_fp4_b200_trt_slurm.sh | ||
| - Remove MOE_BACKEND=CUTLASS, now just defaults to TRTLLM | ||
| PR: https://github.com/InferenceMAX/InferenceMAX/pull/110 | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| MASTER_CONFIGS = [".github/configs/amd-master.yaml", | ||
| ".github/configs/nvidia-master.yaml"] | ||
| RUNNER_CONFIG = ".github/configs/runners.yaml" | ||
| GENERATE_SWEEPS_PY_SCRIPT = "utils/matrix_logic/generate_sweep_configs.py" |
Uh oh!
There was an error while loading. Please reload this page.