Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ The following table lists the additional configuration options available in vLLM
| `kv_cache_dtype` | str | `None` | When using the kv cache quantization method, kv cache dtype needs to be set, currently only int8 is supported. |
| `enable_shared_expert_dp` | bool | `False` | When the shared expert in DP, it has better performance but consumes more memory. Currently only DeepSeek series models are supported to use. |
| `lmhead_tensor_parallel_size` | int | `None` | The custom tensor parallel size of lmhead. |
| `oproj_tensor_parallel_size` | int | `None` | The custom tensor parallel size of oproj. |

The details of each config option are as follows:

Expand Down
10 changes: 7 additions & 3 deletions tests/ut/distributed/test_parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from vllm.config import ParallelConfig

from vllm_ascend.distributed.parallel_state import (
_LMTP, _MC2, destroy_ascend_model_parallel, get_lmhead_tp_group,
get_mc2_group, init_ascend_model_parallel)
_LMTP, _MC2, _OTP, destroy_ascend_model_parallel, get_lmhead_tp_group,
get_mc2_group, get_otp_group, init_ascend_model_parallel)


@pytest.fixture
Expand All @@ -29,16 +29,20 @@ def mock_distributed():
def test_init_ascend_model_parallel(mock_distributed, parallel_config):
mock_ascend_config = MagicMock()
mock_ascend_config.lmhead_tensor_parallel_size = 2
mock_ascend_config.oproj_tensor_parallel_size = 2
with patch('vllm_ascend.distributed.parallel_state.model_parallel_initialized', return_value=False), \
patch('vllm_ascend.distributed.parallel_state.init_model_parallel_group'), \
patch('vllm_ascend.distributed.parallel_state.get_ascend_config', return_value=mock_ascend_config):
init_ascend_model_parallel(parallel_config)

mc2_group = get_mc2_group()
assert mc2_group is not None
lmheadtp_group = get_lmhead_tp_group()
otp_group = get_otp_group()
assert mc2_group is not None
assert otp_group is not None
assert lmheadtp_group is not None

destroy_ascend_model_parallel()
assert _MC2 is None
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed completed

assert _LMTP is None
assert _OTP is None
1 change: 0 additions & 1 deletion tests/ut/models/test_deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ def test_row_parallel_linear(cls, mock_distributed):
linear = cls(input_size=128, output_size=64, bias=False, quant_config=None)
linear.quant_method = Mock()
linear.quant_method.apply.return_value = torch.randn(2, 4, 64)

input_ = torch.randn(2, 4, 128)
with patch("vllm_ascend.models.deepseek_v2.split_tensor_along_last_dim",
return_value=[torch.randn(2, 4, 64)]):
Expand Down
12 changes: 12 additions & 0 deletions tests/ut/models/test_qwen2_5_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,18 @@ def init_vision_transformer(
"vllm_ascend.models.qwen2_5_vl.parallel_state.get_tensor_model_parallel_world_size",
return_value=2,
)
mocker.patch(
"vllm_ascend.ops.linear.divide",
return_value=2,
)

mock_group = mocker.MagicMock()
mock_group.rank_in_group = 0
mock_group.world_size = 2
mocker.patch(
"vllm_ascend.ops.linear.get_tp_group",
return_value=mock_group,
)

vision_transformer = AscendQwen2_5_VisionTransformer(
vision_config,
Expand Down
Loading
Loading