Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -687,10 +687,6 @@ jobs:
run: |
source /etc/profile.d/sglang-ci.sh
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
pip install "setuptools==70.0.0"
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e . --no-build-isolation

- name: Run test
timeout-minutes: 30
Expand Down Expand Up @@ -802,10 +798,6 @@ jobs:
timeout-minutes: 20
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh
pip install "setuptools==70.0.0"
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e . --no-build-isolation

- name: Run test
timeout-minutes: 30
Expand Down
7 changes: 7 additions & 0 deletions scripts/ci/cuda/ci_install_dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,13 @@ mark_step_done "Fix other dependencies"
# can delete the .pth file without reliably recreating it (pip race condition).
$PIP_CMD install "nvidia-cutlass-dsl>=4.4.1" "nvidia-cutlass-dsl-libs-base>=4.4.1" --no-deps --force-reinstall $PIP_INSTALL_SUFFIX || true


# Install human-eval
pip install "setuptools==70.0.0"
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e . --no-build-isolation

# ------------------------------------------------------------------------------
# Prepare runner
# ------------------------------------------------------------------------------
Expand Down
8 changes: 3 additions & 5 deletions test/registered/distributed/test_data_parallelism.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.kits.eval_accuracy_kit import MMLUMixin
from sglang.test.kits.eval_accuracy_kit import GSM8KMixin
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
Expand All @@ -18,10 +18,8 @@
register_amd_ci(est_time=73, suite="stage-b-test-2-gpu-large-amd")


class TestDataParallelism(CustomTestCase, MMLUMixin):
mmlu_score_threshold = 0.65
mmlu_num_examples = 64
mmlu_num_threads = 32
class TestDataParallelism(CustomTestCase, GSM8KMixin):
gsm8k_accuracy_thres = 0.7

@classmethod
def setUpClass(cls):
Expand Down
8 changes: 4 additions & 4 deletions test/registered/distributed/test_dp_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.kits.ebnf_constrained_kit import EBNFConstrainedMixin
from sglang.test.kits.eval_accuracy_kit import MGSMEnMixin
from sglang.test.kits.eval_accuracy_kit import GSM8KMixin
from sglang.test.kits.json_constrained_kit import JSONConstrainedMixin
from sglang.test.kits.radix_cache_server_kit import run_radix_attention_test
from sglang.test.kits.regex_constrained_kit import RegexConstrainedMixin
Expand All @@ -30,16 +30,16 @@

class TestDPAttentionDP2TP2(
CustomTestCase,
MGSMEnMixin,
GSM8KMixin,
JSONConstrainedMixin,
EBNFConstrainedMixin,
RegexConstrainedMixin,
):
mgsm_en_score_threshold = 0.8
gsm8k_accuracy_thres = 0.6

@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
cls.base_url = DEFAULT_URL_FOR_TEST
cls._env_override = envs.SGLANG_DISABLE_CONSECUTIVE_PREFILL_OVERLAP.override(
True
Expand Down
8 changes: 3 additions & 5 deletions test/registered/eval/test_moe_eval_accuracy_large.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.kits.eval_accuracy_kit import HumanEvalMixin, MGSMEnMixin, MMLUMixin
from sglang.test.kits.eval_accuracy_kit import GSM8KMixin
from sglang.test.test_utils import (
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
Expand All @@ -22,10 +22,8 @@
register_amd_ci(est_time=500, suite="stage-b-test-2-gpu-large-amd")


class TestMoEEvalAccuracyLarge(CustomTestCase, MMLUMixin, HumanEvalMixin, MGSMEnMixin):
mmlu_score_threshold = 0.62
humaneval_score_threshold = 0.40
mgsm_en_score_threshold = 0.61
class TestMoEEvalAccuracyLarge(CustomTestCase, GSM8KMixin):
gsm8k_accuracy_thres = 0.6

@classmethod
def setUpClass(cls):
Expand Down
46 changes: 26 additions & 20 deletions test/registered/moe/test_moe_ep.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_cuda_ci
from sglang.test.run_eval import run_eval
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
DEFAULT_MODEL_NAME_FOR_TEST_MLA,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
Expand All @@ -18,7 +18,7 @@
class TestEp(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
Expand All @@ -37,23 +37,26 @@ def setUpClass(cls):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)

def test_mgsm_en(self):
def test_gsm8k(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mgsm_en",
num_examples=None,
num_threads=1024,
num_shots=5,
data_path=None,
num_questions=200,
max_new_tokens=512,
parallel=128,
host="http://127.0.0.1",
port=int(self.base_url.split(":")[-1]),
)
metrics = run_eval_few_shot_gsm8k(args)
print(metrics)

metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.8)
self.assertGreater(metrics["accuracy"], 0.60)


class TestEpDeepGEMM(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MLA_MODEL_NAME_FOR_TEST
cls.model = DEFAULT_MODEL_NAME_FOR_TEST_MLA
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
Expand All @@ -76,17 +79,20 @@ def setUpClass(cls):
def tearDownClass(cls):
kill_process_tree(cls.process.pid)

def test_mgsm_en(self):
def test_gsm8k(self):
args = SimpleNamespace(
base_url=self.base_url,
model=self.model,
eval_name="mgsm_en",
num_examples=None,
num_threads=1024,
num_shots=5,
data_path=None,
num_questions=200,
max_new_tokens=512,
parallel=128,
host="http://127.0.0.1",
port=int(self.base_url.split(":")[-1]),
)
metrics = run_eval_few_shot_gsm8k(args)
print(metrics)

metrics = run_eval(args)
self.assertGreaterEqual(metrics["score"], 0.8)
self.assertGreater(metrics["accuracy"], 0.60)


if __name__ == "__main__":
Expand Down
Loading