Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion requirements/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ nvidia-cutlass-dsl[cu13]==4.5.0
quack-kernels>=0.3.3

# Tokenspeed_MLA for faster mla with spec decode
tokenspeed-mla==0.1.2
tokenspeed-mla==0.1.2

# Humming kernels for quantization gemm
humming-kernels[cu13]==0.1.0
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,8 @@ def _read_requirements(filename: str) -> list[str]:
if "nvidia-cutlass-dsl[cu13]" in req and cuda_major == "12":
# [cu13] extra is the default; strip it on CUDA 12 builds.
req = req.replace("nvidia-cutlass-dsl[cu13]", "nvidia-cutlass-dsl")
if "humming-kernels[cu13]" in req and cuda_major == "12":
req = req.replace("humming-kernels[cu13]", "humming-kernels[cu12]")
modified_requirements.append(req)
requirements = modified_requirements
elif _is_hip():
Expand Down
23 changes: 10 additions & 13 deletions vllm/model_executor/layers/quantization/humming.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,9 @@
RowvLLMParameter,
)
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform

if TYPE_CHECKING:
from vllm.model_executor.models.utils import WeightsMapper


try:
if current_platform.is_cuda():
from humming.dtypes import DataType
from humming.layer import HummingMethod
from humming.schema import (
Expand All @@ -65,16 +62,17 @@
HummingIndexedExperts,
get_humming_moe_gemm_type,
)
except ModuleNotFoundError:
HummingMethod = None


def assert_humming_available():
assert HummingMethod is not None, (
"humming is not available, please run "
"'pip install git+https://github.com/inclusionAI/humming' to install it."
if TYPE_CHECKING:
from humming.schema import (
BaseInputSchema,
BaseWeightSchema,
HummingInputSchema,
HummingWeightSchema,
)

from vllm.model_executor.models.utils import WeightsMapper


def prepare_padded_shape(shape, x):
padded_shape = math.ceil(shape / x) * x
Expand Down Expand Up @@ -186,7 +184,6 @@ class HummingConfig(QuantizationConfig):
packed_modules_mapping: dict[str, list[str]] = {}

def __init__(self, full_config: dict[str, Any] | None = None):
assert_humming_available()
self.full_config: dict[str, Any] = full_config or {}
Comment thread
jinzhen-lin marked this conversation as resolved.

@classmethod
Expand Down
Loading