From ba5af5b99ce5ffef817487f6d10a7816d94fe0f0 Mon Sep 17 00:00:00 2001 From: mgoin Date: Mon, 10 Nov 2025 22:32:30 +0000 Subject: [PATCH 1/3] Lazy import aiter rocm in fp8_utils.py Signed-off-by: mgoin --- vllm/model_executor/layers/quantization/utils/fp8_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 63726c07b7d1..364be5a808f1 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -12,7 +12,6 @@ import vllm.envs as envs from vllm import _custom_ops as ops -from vllm._aiter_ops import rocm_aiter_ops from vllm.logger import init_logger from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.quant_utils import ( @@ -313,6 +312,8 @@ def _run_aiter( weight_scale: torch.Tensor, input_scale: torch.Tensor | None = None, ) -> torch.Tensor: + from vllm._aiter_ops import rocm_aiter_ops + assert self.act_quant_group_shape == GroupShape(1, 128) n, k = weight.shape From 64f25045db80f31b96dc121823fa19c289c3186f Mon Sep 17 00:00:00 2001 From: mgoin Date: Mon, 10 Nov 2025 18:07:50 -0500 Subject: [PATCH 2/3] Update Signed-off-by: mgoin --- vllm/_aiter_ops.py | 4 ++-- vllm/model_executor/layers/quantization/utils/fp8_utils.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/_aiter_ops.py b/vllm/_aiter_ops.py index 9a4b5f3399be..3729ab689164 100644 --- a/vllm/_aiter_ops.py +++ b/vllm/_aiter_ops.py @@ -937,5 +937,5 @@ def shuffle_weights( return tuple(shuffle_weight(tensor, layout=layout) for tensor in tensors) - -rocm_aiter_ops.register_ops_once() +if IS_AITER_FOUND: + rocm_aiter_ops.register_ops_once() diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 364be5a808f1..f41ca037939a 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -10,6 +10,7 @@ import torch +from vllm._aiter_ops import rocm_aiter_ops import vllm.envs as envs from vllm import _custom_ops as ops from vllm.logger import init_logger @@ -312,8 +313,6 @@ def _run_aiter( weight_scale: torch.Tensor, input_scale: torch.Tensor | None = None, ) -> torch.Tensor: - from vllm._aiter_ops import rocm_aiter_ops - assert self.act_quant_group_shape == GroupShape(1, 128) n, k = weight.shape From 5e900b2731dbd760b79f37a8ff7c4f9189fd2374 Mon Sep 17 00:00:00 2001 From: mgoin Date: Mon, 10 Nov 2025 18:13:57 -0500 Subject: [PATCH 3/3] Fix Signed-off-by: mgoin --- vllm/_aiter_ops.py | 1 + vllm/model_executor/layers/quantization/utils/fp8_utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/_aiter_ops.py b/vllm/_aiter_ops.py index 3729ab689164..8d35aa65738b 100644 --- a/vllm/_aiter_ops.py +++ b/vllm/_aiter_ops.py @@ -937,5 +937,6 @@ def shuffle_weights( return tuple(shuffle_weight(tensor, layout=layout) for tensor in tensors) + if IS_AITER_FOUND: rocm_aiter_ops.register_ops_once() diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index f41ca037939a..63726c07b7d1 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -10,9 +10,9 @@ import torch -from vllm._aiter_ops import rocm_aiter_ops import vllm.envs as envs from vllm import _custom_ops as ops +from vllm._aiter_ops import rocm_aiter_ops from vllm.logger import init_logger from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.quant_utils import (