Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/pr-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ on:
description: "Whether the PR must have the run-ci label"
type: boolean
default: true
require-run-ci-extra:
description: "Whether the PR must also have the run-ci-extra label (in addition to run-ci)"
type: boolean
default: false
cool-down-minutes:
description: "Cooldown period in minutes for low-permission users; 0 disables rate limiting"
type: number
Expand Down Expand Up @@ -41,6 +45,7 @@ jobs:
echo "PR Draft: ${{ steps.pr.outputs.draft }}"
echo "PR User: ${{ steps.pr.outputs.user }}"
echo "Require run-ci: ${{ inputs.require-run-ci }}"
echo "Require run-ci-extra: ${{ inputs.require-run-ci-extra }}"
echo "Cool down minutes: ${{ inputs.cool-down-minutes }}"
echo "==================="

Expand All @@ -59,6 +64,19 @@ jobs:
exit 1
fi

# Live-fetch label gate for pr-test-extra.yml. Workflow-level `if`
# checks would read the frozen event payload, which doesn't update on
# rerun — so /tag-and-rerun-ci extra (slash handler adds the label
# then reruns) could never un-skip. A runtime step using gh-api-fetched
# labels lets reruns pick up the new label set.
- name: Require run-ci-extra label (optional)
if: github.event_name == 'pull_request' && inputs.require-run-ci-extra == true
run: |
if [[ "${{ contains(fromJson(steps.pr.outputs.labels), 'run-ci-extra') }}" == "false" ]]; then
echo "Missing required label 'run-ci-extra'. Add the label (e.g. via /tag-and-rerun-ci extra) to opt this PR into the extra test workflow."
exit 1
fi

- name: Enforce rate limit for low-permission actors (optional)
if: github.event_name == 'pull_request' && inputs.cool-down-minutes > 0
uses: actions/github-script@v7
Expand Down
88 changes: 52 additions & 36 deletions .github/workflows/pr-test-extra.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
name: PR Test Extra
# Label-gated CI for nightly-class tests opted into a per-PR run.
# Label-gated extra CI workflow opted into per-PR via labels.
#
# Adds runtime to a PR only when the author asks for it: pull_request
# events bail unless the PR carries the `run-ci-extra` label. The same job
# graph runs unconditionally on workflow_dispatch / workflow_call so it
# Adds runtime to a PR only when the author asks for it: the PR must carry
# BOTH `run-ci` (basic-CI prerequisite) and `run-ci-extra` (explicit opt-in
# to this workflow). The label check happens at runtime in pr-gate.yml via
# live `gh pr view --json labels`, so reruns after adding the labels via
# slash command pick up the new label set (workflow-level `if` checks would
# read the frozen event payload, which never updates on rerun). The same
# job graph runs unconditionally on workflow_dispatch / workflow_call so it
# can be triggered manually or chained from another workflow.
#
# Stages: extra-a (1-/2-gpu) and extra-b (4-/8-gpu) caller stubs reuse
Expand Down Expand Up @@ -68,27 +72,24 @@ permissions:

jobs:
# =============================================== check changes ====================================================
# Label gate: pull_request events only proceed when the PR carries BOTH
# `run-ci` and `run-ci-extra` labels — `run-ci` is the basic-CI prerequisite
# (matching pr-test.yml's pr-gate `require-run-ci`) and `run-ci-extra` is the
# explicit opt-in to this workflow. Other event types
# (workflow_dispatch / workflow_call) always run. When this job is
# skipped by the gate, every downstream caller stub naturally skips
# because its needs do not resolve.
# The actual `run-ci` + `run-ci-extra` label gate now lives in the
# `call-gate` job below, which calls pr-gate.yml. pr-gate.yml live-fetches
# the PR's current labels at runtime, so a rerun (e.g. via
# /tag-and-rerun-ci extra, which adds labels via GITHUB_TOKEN — these
# don't cascade-trigger labeled events) picks up the new label set and
# passes. The previous workflow-level `if` checked the frozen event
# payload, which never updates on rerun and made the slash-add-label flow
# impossible to recover.
#
# For `labeled` events we additionally require the just-added label to be
# one of the two gating labels — otherwise every unrelated label addition
# would dispatch a full CI run.
# We only keep one filter here: for `labeled` events, only proceed if the
# just-added label is one of the gating labels. This prevents every
# unrelated label addition from dispatching a full CI run.
check-changes:
if: |
github.event_name != 'pull_request' ||
(
(github.event.action != 'labeled' ||
github.event.label.name == 'run-ci' ||
github.event.label.name == 'run-ci-extra') &&
contains(github.event.pull_request.labels.*.name, 'run-ci') &&
contains(github.event.pull_request.labels.*.name, 'run-ci-extra')
)
github.event.action != 'labeled' ||
github.event.label.name == 'run-ci' ||
github.event.label.name == 'run-ci-extra'
uses: ./.github/workflows/_pr-test-check-changes.yml
with:
git_ref: ${{ inputs.git_ref || '' }}
Expand All @@ -97,11 +98,26 @@ jobs:
pr_test_yml: '.github/workflows/pr-test-extra.yml'
secrets: inherit

# =============================================== PR Gate ====================================================
# Runtime live-fetch gate. Mirrors pr-test.yml's call-gate, but additionally
# requires `run-ci-extra`. Failure here cascades down to every test job
# via `needs`, so the workflow ends with one red gate job (~30s) plus a
# row of skipped test jobs instead of consuming a runner.
call-gate:
needs: check-changes
if: github.event_name != 'schedule' && needs.check-changes.result == 'success'
uses: ./.github/workflows/pr-gate.yml
with:
require-run-ci: true
require-run-ci-extra: true
secrets: inherit

# =============================================== sgl-kernel ====================================================
sgl-kernel-build-wheels:
needs: check-changes
needs: [check-changes, call-gate]
if: |
needs.check-changes.result == 'success' &&
needs.call-gate.result == 'success' &&
needs.check-changes.outputs.sgl_kernel == 'true'
uses: ./.github/workflows/_pr-test-sgl-kernel-build.yml
with:
Expand All @@ -113,8 +129,8 @@ jobs:

# =============================================== extra-a (1-/2-gpu) ===============================================
extra-a-test-1-gpu-small:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-a-test-1-gpu-small
Expand All @@ -126,8 +142,8 @@ jobs:
secrets: inherit

extra-a-test-1-gpu-large:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-a-test-1-gpu-large
Expand All @@ -140,8 +156,8 @@ jobs:
secrets: inherit

extra-a-test-2-gpu-large:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-a-test-2-gpu-large
Expand All @@ -154,8 +170,8 @@ jobs:

# =============================================== extra-b (4-/8-gpu) ===============================================
extra-b-test-4-gpu-h100:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-b-test-4-gpu-h100
Expand All @@ -167,8 +183,8 @@ jobs:
secrets: inherit

extra-b-test-4-gpu-b200:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-b-test-4-gpu-b200
Expand All @@ -181,8 +197,8 @@ jobs:
secrets: inherit

extra-b-test-8-gpu-h200:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-b-test-8-gpu-h200
Expand All @@ -194,8 +210,8 @@ jobs:
secrets: inherit

extra-b-test-deepep-8-gpu-h200:
needs: [check-changes, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' }}
needs: [check-changes, call-gate, sgl-kernel-build-wheels]
if: ${{ !failure() && !cancelled() && needs.check-changes.result == 'success' && needs.call-gate.result == 'success' }}
uses: ./.github/workflows/_pr-test-stage.yml
with:
self_name: extra-b-test-deepep-8-gpu-h200
Expand Down
6 changes: 5 additions & 1 deletion python/sglang/srt/managers/schedule_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2451,10 +2451,14 @@ def prepare_for_decode(self):
)

def maybe_wait_verify_done(self):
# Use event.wait() (stream-level wait) instead of .synchronize()
# (CPU block). Schedule-stream prep ops following this call get
# ordered after the forward-stream verify via the wait; CPU is not
# blocked. Subsequent .cpu()/.item() naturally sync the stream.
if self.is_spec_v2:
draft_input: EagleDraftInput = self.spec_info
if draft_input.verify_done is not None:
draft_input.verify_done.synchronize()
draft_input.verify_done.wait()

def filter_batch(
self,
Expand Down
Loading