Skip to content

Commit 2c59976

Browse files
committed
add test for contextPP4_genTP4
Signed-off-by: Chuang Zhu <[email protected]>
1 parent b688f08 commit 2c59976

File tree

3 files changed

+59
-0
lines changed

3 files changed

+59
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
2+
hostname: localhost
3+
port: 8000
4+
backend: "pytorch"
5+
cuda_graph_config: null
6+
free_gpu_memory_fraction: 0.2
7+
context_servers:
8+
num_instances: 1
9+
max_batch_size: 1
10+
max_num_tokens: 3000
11+
max_seq_len: 4096
12+
tensor_parallel_size: 1
13+
pipeline_parallel_size: 4
14+
kv_cache_config:
15+
free_gpu_memory_fraction: 0.2
16+
enable_partial_reuse: False
17+
disable_overlap_scheduler: True
18+
cache_transceiver_config:
19+
backend: DEFAULT
20+
urls:
21+
- "localhost:8001"
22+
generation_servers:
23+
num_instances: 1
24+
tensor_parallel_size: 4
25+
pipeline_parallel_size: 1
26+
max_batch_size: 256
27+
max_num_tokens: 4096
28+
max_seq_len: 4096
29+
kv_cache_config:
30+
free_gpu_memory_fraction: 0.2
31+
enable_partial_reuse: False
32+
disable_overlap_scheduler: True
33+
cache_transceiver_config:
34+
backend: DEFAULT
35+
urls:
36+
- "localhost:8002"

tests/integration/defs/disaggregated/test_disaggregated.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ def get_test_config(test_desc, example_dir, test_root):
7676
(8, f"{test_configs_root}/disagg_config_ctxtp2pp2_gentp2pp2.yaml"),
7777
"ctxpp4_genpp4":
7878
(8, f"{test_configs_root}/disagg_config_ctxpp4_genpp4.yaml"),
79+
"ctxpp4_gentp4":
80+
(8, f"{test_configs_root}/disagg_config_ctxpp4_gentp4.yaml"),
7981
"deepseek_v3_lite_fp8_mpi":
8082
(4,
8183
f"{test_configs_root}/disagg_config_ctxtp2_gentp2_deepseek_v3_lite_mpi.yaml"
@@ -709,6 +711,26 @@ def test_disaggregated_ctxpp4_genpp4(disaggregated_test_root, llm_venv,
709711
cwd=llm_venv.get_working_directory())
710712

711713

714+
@pytest.mark.skip_less_device(4)
715+
@pytest.mark.parametrize("llama_model_root", ['TinyLlama-1.1B-Chat-v1.0'],
716+
indirect=True)
717+
def test_disaggregated_ctxpp4_gentp4(disaggregated_test_root, llm_venv,
718+
disaggregated_example_root,
719+
llama_model_root):
720+
src_dst_dict = {
721+
llama_model_root:
722+
f"{llm_venv.get_working_directory()}/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
723+
}
724+
for src, dst in src_dst_dict.items():
725+
if not os.path.islink(dst):
726+
os.makedirs(os.path.dirname(dst), exist_ok=True)
727+
os.symlink(src, dst, target_is_directory=True)
728+
run_disaggregated_test(disaggregated_example_root,
729+
"ctxpp4_gentp4",
730+
env=llm_venv._new_env,
731+
cwd=llm_venv.get_working_directory())
732+
733+
712734
@skip_no_hopper
713735
@pytest.mark.skip_less_device(4)
714736
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ l0_dgx_h100:
3333
- disaggregated/test_disaggregated.py::test_disaggregated_ctxpp2_genpp2[TinyLlama-1.1B-Chat-v1.0]
3434
- disaggregated/test_disaggregated.py::test_disaggregated_ctxtp2_genpp2[TinyLlama-1.1B-Chat-v1.0]
3535
- disaggregated/test_disaggregated.py::test_disaggregated_ctxpp2_gentp2[TinyLlama-1.1B-Chat-v1.0]
36+
- disaggregated/test_disaggregated.py::test_disaggregated_ctxpp4_gentp4[TinyLlama-1.1B-Chat-v1.0]
3637
- disaggregated/test_disaggregated.py::test_disaggregated_genbs1[TinyLlama-1.1B-Chat-v1.0]
3738
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False]
3839
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True]

0 commit comments

Comments
 (0)