vllm-project · DarkLight1337 · Mar 5, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
@@ -92,24 +92,6 @@ class CacheConfig:
     benefits before turning this on.\n
     - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
     reproducible hashing. Requires the optional ``xxhash`` package."""
-    cpu_offload_gb: float = Field(default=0, ge=0)
-    """The space in GiB to offload to CPU, per GPU. Default is 0, which means
-    no offloading. Intuitively, this argument can be seen as a virtual way to
-    increase the GPU memory size. For example, if you have one 24 GB GPU and
-    set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
-    load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
-    Note that this requires fast CPU-GPU interconnect, as part of the model is
-    loaded from CPU memory to GPU memory on the fly in each model forward pass.
-
-    DEPRECATED: This field is deprecated and will be removed in v0.16.
-    Please use OffloadConfig.uva.cpu_offload_gb instead.
-    """
-    cpu_offload_params: set[str] = Field(default_factory=set)
-    """The set of parameter name segments to target for CPU offloading.
-
-    DEPRECATED: This field is deprecated and will be removed in v0.16.
-    Please use OffloadConfig.uva.cpu_offload_params instead.
-    """
     calculate_kv_scales: bool = False
     """This enables dynamic calculation of `k_scale` and `v_scale` when
     kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model

@@ -381,13 +381,6 @@ class CompilationConfig:
         certain small batchsizes, where inductor is good at optimizing.
     """
 
-    # Top-level Compilation control
-    level: int = Field(default=None)
-    """
-    Level is deprecated and will be removed in the next release,
-    either 0.12.0 or 0.11.2 whichever is soonest.
-    Please use mode. Currently all levels are mapped to mode.
-    """
     # Top-level Compilation control
     mode: CompilationMode = Field(default=None)
     """The compilation approach used for torch.compile-based compilation of the
@@ -801,17 +794,6 @@ def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
         return handler(value)
 
     def __post_init__(self) -> None:
-        if self.level is not None:
-            logger.warning(
-                "Level is deprecated and will be removed in the next release,"
-                "either 0.12.0 or 0.11.2 whichever is soonest."
-                "Use mode instead."
-                "If both level and mode are given,"
-                "only mode will be used."
-            )
-            if self.mode is None:
-                self.mode = self.level
-
         count_none = self.custom_ops.count("none")
         count_all = self.custom_ops.count("all")
         assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"

@@ -1074,21 +1074,6 @@ def _get_mm_prompt_updates(
             mm_items.get_all_counts(),
         )
 
-        for modality, prompt_updates in mm_prompt_updates.items():
-            for item_idx, item_prompt_updates in enumerate(prompt_updates):
-                if len(item_prompt_updates) > 1:
-                    logger.warning_once(
-                        "Detected %d prompt updates for `mm_items[%r][%s]`. "
-                        "Multiple prompt updates per item is now "
-                        "deprecated and may be removed in v0.13. "
-                        "Instead, please specify dynamic update targets "
-                        "in the same prompt update definition by passing "
-                        "a function to `PromptUpdate.target`.",
-                        len(prompt_updates),
-                        modality,
-                        item_idx,
-                    )
-
         return mm_prompt_updates
 
     def _find_mm_placeholders(