From c42d04078070cbf409120d058a509dbb83b8bd02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Olejniczak?= <pawelx.olejniczak@intel.com>
Date: Thu, 21 May 2026 23:32:27 +0300
Subject: [PATCH] Fix DynamicNTKScalingRotaryEmbedding and
 HPUCompressedTensorsConfig for upstream vllm@0a54df28
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause: Upstream vLLM PRs #41277 and #43144 changed APIs in DynamicNTKScalingRotaryEmbedding (added max_trained_positions) and CompressedTensorsConfig (removed sparsity params).
Upstream: vllm-project/vllm#41277, vllm-project/vllm#43144
Fix: Add max_trained_positions to rotary embedding test; remove stale sparsity_scheme_map and sparsity_ignore_list from HPUCompressedTensorsConfig init.

Signed-off-by: Paweł Olejniczak <pawelx.olejniczak@intel.com>
---
 tests/unit_tests/ops/test_hpu_rotary_embedding.py | 1 +
 vllm_gaudi/ops/hpu_compressed_tensors.py          | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/unit_tests/ops/test_hpu_rotary_embedding.py b/tests/unit_tests/ops/test_hpu_rotary_embedding.py
index 2ef23ab4f9..35c2bb5805 100644
--- a/tests/unit_tests/ops/test_hpu_rotary_embedding.py
+++ b/tests/unit_tests/ops/test_hpu_rotary_embedding.py
@@ -201,6 +201,7 @@ def test_dynamic_ntk_scaling_rotary_embedding(
         "head_size": head_size,
         "rotary_dim": rotary_dim,
         "max_position_embeddings": max_position_embeddings,
+        "max_trained_positions": max_position_embeddings,
         "base": base,
         "is_neox_style": is_neox_style,
         "scaling_factor": scaling_factor,
diff --git a/vllm_gaudi/ops/hpu_compressed_tensors.py b/vllm_gaudi/ops/hpu_compressed_tensors.py
index af7916020c..74d91e809b 100644
--- a/vllm_gaudi/ops/hpu_compressed_tensors.py
+++ b/vllm_gaudi/ops/hpu_compressed_tensors.py
@@ -20,7 +20,6 @@
     CompressedTensorsConfig,
     CompressedTensorsMoEMethod,
     CompressedTensorsKVCacheMethod,
-    SparsityCompressionConfig,
 )
 from vllm.model_executor.layers.quantization.compressed_tensors import (compressed_tensors_moe)
 from vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe import (
@@ -1056,8 +1055,6 @@ def __init__(
         target_scheme_map: dict[str, Any],
         ignore: list[str],
         quant_format: str,
-        sparsity_scheme_map: dict[str, SparsityCompressionConfig],
-        sparsity_ignore_list: list[str],
         kv_cache_scheme: dict[str, Any] | None = None,
         config: dict[str, Any] | None = None,
         transform_config: dict[str, Any] | None = None,
@@ -1068,8 +1065,6 @@ def __init__(
             target_scheme_map,
             ignore,
             quant_format,
-            sparsity_scheme_map,
-            sparsity_ignore_list,
             kv_cache_scheme,
             config,
             transform_config,