From c42d04078070cbf409120d058a509dbb83b8bd02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Olejniczak?= Date: Thu, 21 May 2026 23:32:27 +0300 Subject: [PATCH] Fix DynamicNTKScalingRotaryEmbedding and HPUCompressedTensorsConfig for upstream vllm@0a54df28 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: Upstream vLLM PRs #41277 and #43144 changed APIs in DynamicNTKScalingRotaryEmbedding (added max_trained_positions) and CompressedTensorsConfig (removed sparsity params). Upstream: vllm-project/vllm#41277, vllm-project/vllm#43144 Fix: Add max_trained_positions to rotary embedding test; remove stale sparsity_scheme_map and sparsity_ignore_list from HPUCompressedTensorsConfig init. Signed-off-by: Paweł Olejniczak --- tests/unit_tests/ops/test_hpu_rotary_embedding.py | 1 + vllm_gaudi/ops/hpu_compressed_tensors.py | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/unit_tests/ops/test_hpu_rotary_embedding.py b/tests/unit_tests/ops/test_hpu_rotary_embedding.py index 2ef23ab4f9..35c2bb5805 100644 --- a/tests/unit_tests/ops/test_hpu_rotary_embedding.py +++ b/tests/unit_tests/ops/test_hpu_rotary_embedding.py @@ -201,6 +201,7 @@ def test_dynamic_ntk_scaling_rotary_embedding( "head_size": head_size, "rotary_dim": rotary_dim, "max_position_embeddings": max_position_embeddings, + "max_trained_positions": max_position_embeddings, "base": base, "is_neox_style": is_neox_style, "scaling_factor": scaling_factor, diff --git a/vllm_gaudi/ops/hpu_compressed_tensors.py b/vllm_gaudi/ops/hpu_compressed_tensors.py index af7916020c..74d91e809b 100644 --- a/vllm_gaudi/ops/hpu_compressed_tensors.py +++ b/vllm_gaudi/ops/hpu_compressed_tensors.py @@ -20,7 +20,6 @@ CompressedTensorsConfig, CompressedTensorsMoEMethod, CompressedTensorsKVCacheMethod, - SparsityCompressionConfig, ) from vllm.model_executor.layers.quantization.compressed_tensors import (compressed_tensors_moe) from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import ( @@ -1056,8 +1055,6 @@ def __init__( target_scheme_map: dict[str, Any], ignore: list[str], quant_format: str, - sparsity_scheme_map: dict[str, SparsityCompressionConfig], - sparsity_ignore_list: list[str], kv_cache_scheme: dict[str, Any] | None = None, config: dict[str, Any] | None = None, transform_config: dict[str, Any] | None = None, @@ -1068,8 +1065,6 @@ def __init__( target_scheme_map, ignore, quant_format, - sparsity_scheme_map, - sparsity_ignore_list, kv_cache_scheme, config, transform_config,