Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/llm/doc_code/serve/multi_gpu/dp_basic_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ def _testing_build_dp_openai_app(builder_config, **kwargs):
engine_kwargs={
"data_parallel_size": 2, # Number of DP replicas
"tensor_parallel_size": 1, # TP size per replica
# Reduced for CI compatibility
"max_model_len": 1024,
"max_num_seqs": 32,
},
experimental_configs={
# This is a temporary required config. We will remove this in future versions.
Expand Down
10 changes: 8 additions & 2 deletions doc/source/llm/doc_code/serve/multi_gpu/dp_pd_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def _testing_build_dp_deployment(llm_config, **kwargs):
"kv_transfer_config": {
"kv_connector": "NixlConnector",
"kv_role": "kv_both",
}
},
# Reduced for CI compatibility
"max_model_len": 1024,
"max_num_seqs": 32,
},
experimental_configs={
"dp_size_per_node": 2,
Expand All @@ -83,7 +86,10 @@ def _testing_build_dp_deployment(llm_config, **kwargs):
"kv_transfer_config": {
"kv_connector": "NixlConnector",
"kv_role": "kv_both",
}
},
# Reduced for CI compatibility
"max_model_len": 1024,
"max_num_seqs": 32,
},
experimental_configs={
"dp_size_per_node": 2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
" model_source=\"unsloth/Llama-3.2-1B-Instruct\",\n",
" # vLLM engine config.\n",
" engine_kwargs=dict(\n",
" # Specify the guided decoding library to use. The default is \"xgrammar\".\n",
" # Specify the structured outputs backend to use. The default is \"xgrammar\".\n",
" # See https://docs.vllm.ai/en/latest/serving/engine_args.html\n",
" # for other available libraries.\n",
" guided_decoding_backend=\"xgrammar\",\n",
" # for other available backends.\n",
" structured_outputs_config={\"backend\": \"xgrammar\"},\n",
" # Older GPUs (e.g. T4) don't support bfloat16. You should remove\n",
" # this line if you're using later GPUs.\n",
" dtype=\"half\",\n",
Expand Down
2 changes: 1 addition & 1 deletion doc/source/ray-overview/examples/e2e-audio/README.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@
" \"enable_chunked_prefill\": True,\n",
" \"max_num_batched_tokens\": 1028,\n",
" \"max_model_len\": 4096,\n",
" \"guided_decoding_backend\": \"xgrammar\",\n",
" \"structured_outputs_config\": {\"backend\": \"xgrammar\"},\n",
" \"dtype\": torch.float16,\n",
" },\n",
" concurrency=3,\n",
Expand Down
2 changes: 1 addition & 1 deletion doc/source/ray-overview/examples/e2e-audio/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ judge_config = vLLMEngineProcessorConfig(
"enable_chunked_prefill": True,
"max_num_batched_tokens": 1028,
"max_model_len": 4096,
"guided_decoding_backend": "xgrammar",
"structured_outputs_config": {"backend": "xgrammar"},
"dtype": torch.float16,
},
concurrency=3,
Expand Down
5 changes: 3 additions & 2 deletions python/deplocks/base_deps/ray_base_deps_py3.10.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1060,8 +1060,9 @@ smart-open==6.2.0 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# -r docker/base-deps/requirements.in
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# azure-core
Expand Down
5 changes: 3 additions & 2 deletions python/deplocks/base_deps/ray_base_deps_py3.11.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,9 @@ smart-open==6.2.0 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# -r docker/base-deps/requirements.in
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# azure-core
Expand Down
5 changes: 3 additions & 2 deletions python/deplocks/base_deps/ray_base_deps_py3.12.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,9 @@ smart-open==6.2.0 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# -r docker/base-deps/requirements.in
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# azure-core
Expand Down
5 changes: 3 additions & 2 deletions python/deplocks/base_deps/ray_base_deps_py3.9.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1060,8 +1060,9 @@ smart-open==6.2.0 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# -r docker/base-deps/requirements.in
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# azure-core
Expand Down
18 changes: 9 additions & 9 deletions python/deplocks/base_extra/ray_base_extra_py3.10.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1685,9 +1685,9 @@ oauth2client==4.1.3 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# anyscale
packaging==23.0 \
--hash=sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2 \
--hash=sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97
packaging==24.2 \
--hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
--hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# anyscale
Expand Down Expand Up @@ -2179,9 +2179,9 @@ rfc3986-validator==0.1.1 \
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# jsonschema
# jupyter-events
rich==13.3.3 \
--hash=sha256:540c7d6d26a1178e8e8b37e9ba44573a3cd1464ff6348b99ee7061b95d1c6333 \
--hash=sha256:dc84400a9d842b3a9c5ff74addd8eb798d155f36c1c91303888e0a66850d2a15
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# anyscale
Expand Down Expand Up @@ -2483,9 +2483,9 @@ types-python-dateutil==2.9.0.20240316 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# arrow
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.10.lock
# anyscale
Expand Down
18 changes: 9 additions & 9 deletions python/deplocks/base_extra/ray_base_extra_py3.11.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1673,9 +1673,9 @@ oauth2client==4.1.3 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# anyscale
packaging==23.0 \
--hash=sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2 \
--hash=sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97
packaging==24.2 \
--hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
--hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# anyscale
Expand Down Expand Up @@ -2167,9 +2167,9 @@ rfc3986-validator==0.1.1 \
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# jsonschema
# jupyter-events
rich==13.3.3 \
--hash=sha256:540c7d6d26a1178e8e8b37e9ba44573a3cd1464ff6348b99ee7061b95d1c6333 \
--hash=sha256:dc84400a9d842b3a9c5ff74addd8eb798d155f36c1c91303888e0a66850d2a15
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# anyscale
Expand Down Expand Up @@ -2465,9 +2465,9 @@ types-python-dateutil==2.9.0.20240316 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# arrow
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.11.lock
# anyscale
Expand Down
18 changes: 9 additions & 9 deletions python/deplocks/base_extra/ray_base_extra_py3.12.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1673,9 +1673,9 @@ oauth2client==4.1.3 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# anyscale
packaging==23.0 \
--hash=sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2 \
--hash=sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97
packaging==24.2 \
--hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
--hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# anyscale
Expand Down Expand Up @@ -2167,9 +2167,9 @@ rfc3986-validator==0.1.1 \
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# jsonschema
# jupyter-events
rich==13.3.3 \
--hash=sha256:540c7d6d26a1178e8e8b37e9ba44573a3cd1464ff6348b99ee7061b95d1c6333 \
--hash=sha256:dc84400a9d842b3a9c5ff74addd8eb798d155f36c1c91303888e0a66850d2a15
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# anyscale
Expand Down Expand Up @@ -2436,9 +2436,9 @@ types-python-dateutil==2.9.0.20240316 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# arrow
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.12.lock
# anyscale
Expand Down
18 changes: 9 additions & 9 deletions python/deplocks/base_extra/ray_base_extra_py3.9.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1692,9 +1692,9 @@ oauth2client==4.1.3 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# anyscale
packaging==23.0 \
--hash=sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2 \
--hash=sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97
packaging==24.2 \
--hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \
--hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# anyscale
Expand Down Expand Up @@ -2186,9 +2186,9 @@ rfc3986-validator==0.1.1 \
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# jsonschema
# jupyter-events
rich==13.3.3 \
--hash=sha256:540c7d6d26a1178e8e8b37e9ba44573a3cd1464ff6348b99ee7061b95d1c6333 \
--hash=sha256:dc84400a9d842b3a9c5ff74addd8eb798d155f36c1c91303888e0a66850d2a15
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# anyscale
Expand Down Expand Up @@ -2490,9 +2490,9 @@ types-python-dateutil==2.9.0.20240316 \
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# arrow
typing-extensions==4.12.2 \
--hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
--hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
typing-extensions==4.15.0 \
--hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
--hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
# via
# -c release/ray_release/byod/ray_base_extra_testdeps_py3.9.lock
# anyscale
Expand Down
Loading