Refactor to add TypeBasedDispatcher to simplify dispatching #4401

Workflow file for this run

.github/workflows/pr-test.yml at 9f6937a

	name: PR Test

	on:
	push:
	branches: [ main ]
	paths:
	- "python/sglang/**"
	- "test/**"
	pull_request:
	branches: [ main ]
	paths:
	- "python/sglang/**"
	- "test/**"
	workflow_dispatch:
	inputs:
	version:
	description: "FlashInfer version"
	required: true
	type: choice
	default: 'release'
	options:
	- 'release'
	- 'nightly'

	concurrency:
	group: pr-test-${{ github.ref }}
	cancel-in-progress: true

	jobs:

	unit-test-frontend:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 10
	run: \|
	cd test/lang
	python3 run_suite.py --suite per-commit

	unit-test-backend-1-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	strategy:
	matrix:
	range: [0-6, 6-16, 16-23, 23-30, 30-38, 38-100]
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	- name: Run test
	timeout-minutes: 25
	run: \|
	RANGE=${{ matrix.range }}
	range_begin=${RANGE%-*}
	range_end=${RANGE#*-}

	cd test/srt
	python3 run_suite.py --suite per-commit --range-begin ${range_begin} --range-end ${range_end}


	unit-test-backend-2-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	- name: Test data parallelism (DP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 test_data_parallelism.py

	- name: Test data parallelism attention (DP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 test_dp_attention.py

	- name: Test update weights from distributed
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 test_update_weights_from_distributed.py

	- name: Test expert parallelism (EP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 test_moe_ep.py

	performance-test-1-gpu-part-1:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	- name: Benchmark single latency
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_default

	- name: Benchmark online latency
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default

	- name: Benchmark offline throughput
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default

	- name: Benchmark offline throughput (Non-streaming, small batch size)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size

	performance-test-1-gpu-part-2:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	- name: Benchmark offline throughput (w/o RadixAttention)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache

	- name: Benchmark offline throughput (w/ Triton)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend

	- name: Benchmark offline throughput (w/ FP8)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8

	performance-test-2-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	- name: Benchmark single latency (TP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_default

	- name: Benchmark offline throughput (TP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default

	- name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
	timeout-minutes: 10
	run: \|
	cd test/srt
	python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache

	accuracy-test-1-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 1-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	git clone https://github.com/merrymercy/human-eval.git
	cd human-eval
	pip install -e .

	- name: Evaluate accuracy
	timeout-minutes: 20
	run: \|
	cd test/srt
	python3 test_eval_accuracy_large.py


	accuracy-test-2-gpu:
	if: github.repository == 'sgl-project/sglang' \|\| github.event_name == 'pull_request'
	runs-on: 2-gpu-runner
	steps:
	- name: Checkout code
	uses: actions/checkout@v3

	- name: Install dependencies
	env:
	FLASHINFER_REPO: ${{ inputs.version == 'nightly' && 'https://flashinfer.ai/whl/nightly/cu124/torch2.4/flashinfer' \|\| 'https://flashinfer.ai/whl/cu124/torch2.4/flashinfer' }}
	run: \|
	bash scripts/ci_install_dependency.sh

	git clone https://github.com/merrymercy/human-eval.git
	cd human-eval
	pip install -e .

	- name: Evaluate accuracy (TP=2)
	timeout-minutes: 20
	run: \|
	cd test/srt
	python3 test_moe_eval_accuracy_large.py


	finish:
	needs: [
	unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu,
	performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
	accuracy-test-1-gpu, accuracy-test-2-gpu
	]
	runs-on: ubuntu-latest
	steps:
	- name: Finish
	run: echo "This is an empty step to ensure that all jobs are completed."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Refactor to add TypeBasedDispatcher to simplify dispatching #4401

Workflow file

Refactor to add TypeBasedDispatcher to simplify dispatching #4401

Jobs

Run details

Workflow file for this run