Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tests/models/language/generation/test_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tests.models.registry import HF_EXAMPLE_MODELS
from tests.utils import multi_gpu_test
from vllm.engine.arg_utils import EngineArgs
from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams
from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher

Expand Down Expand Up @@ -577,6 +578,10 @@ def test_apc_multiple_prompts_all_cached_outputs(
model, max_model_len, tensor_parallel_size=tensor_parallel_size
)
vllm_runner_kwargs["mamba_ssm_cache_dtype"] = "float32"
# Reduce the effects of batch variance on ROCm since batch invariance is not
# yet supported. See: https://github.com/vllm-project/vllm/issues/27433
if current_platform.is_rocm():
vllm_runner_kwargs["max_num_seqs"] = 4
Comment on lines +581 to +584
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change strictly related to the bug fix from this PR?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes in the sense that this PR is intended to fix test failures in the Language Models (Hybrid) test group on AMD ROCm. Without this change, this test consistently fails.


vllm_outputs_no_cache, _ = _get_vLLM_output(
vllm_runner, vllm_runner_kwargs, generated_prompts, max_tokens, num_logprobs
Expand Down
6 changes: 6 additions & 0 deletions vllm/model_executor/layers/mamba/mamba_mixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ def _ssm_transform(
time_step = self.dt_layernorm(time_step.contiguous())
B = self.b_layernorm(B.contiguous())
C = self.c_layernorm(C.contiguous())

# ROCm: tensor from split is non-contiguous, causing incorrect
# GEMM results in dt_proj.
if current_platform.is_rocm():
time_step = time_step.contiguous()

discrete_time_step = self.dt_proj(time_step)[0].transpose(-2, -1)
return discrete_time_step, B, C

Expand Down