Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions vllm/config/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,24 +92,6 @@ class CacheConfig:
benefits before turning this on.\n
- "xxhash_cbor" combines canonical CBOR serialization with xxHash for
reproducible hashing. Requires the optional ``xxhash`` package."""
cpu_offload_gb: float = Field(default=0, ge=0)
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
no offloading. Intuitively, this argument can be seen as a virtual way to
increase the GPU memory size. For example, if you have one 24 GB GPU and
set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
Note that this requires fast CPU-GPU interconnect, as part of the model is
loaded from CPU memory to GPU memory on the fly in each model forward pass.

DEPRECATED: This field is deprecated and will be removed in v0.16.
Please use OffloadConfig.uva.cpu_offload_gb instead.
"""
cpu_offload_params: set[str] = Field(default_factory=set)
"""The set of parameter name segments to target for CPU offloading.

DEPRECATED: This field is deprecated and will be removed in v0.16.
Please use OffloadConfig.uva.cpu_offload_params instead.
"""
calculate_kv_scales: bool = False
"""This enables dynamic calculation of `k_scale` and `v_scale` when
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
Expand Down
18 changes: 0 additions & 18 deletions vllm/config/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,6 @@ class CompilationConfig:
certain small batchsizes, where inductor is good at optimizing.
"""

# Top-level Compilation control
level: int = Field(default=None)
"""
Level is deprecated and will be removed in the next release,
either 0.12.0 or 0.11.2 whichever is soonest.
Please use mode. Currently all levels are mapped to mode.
"""
# Top-level Compilation control
mode: CompilationMode = Field(default=None)
"""The compilation approach used for torch.compile-based compilation of the
Expand Down Expand Up @@ -801,17 +794,6 @@ def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
return handler(value)

def __post_init__(self) -> None:
if self.level is not None:
logger.warning(
"Level is deprecated and will be removed in the next release,"
"either 0.12.0 or 0.11.2 whichever is soonest."
"Use mode instead."
"If both level and mode are given,"
"only mode will be used."
)
if self.mode is None:
self.mode = self.level

count_none = self.custom_ops.count("none")
count_all = self.custom_ops.count("all")
assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"
Expand Down
15 changes: 0 additions & 15 deletions vllm/multimodal/processing/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,21 +1074,6 @@ def _get_mm_prompt_updates(
mm_items.get_all_counts(),
)

for modality, prompt_updates in mm_prompt_updates.items():
for item_idx, item_prompt_updates in enumerate(prompt_updates):
if len(item_prompt_updates) > 1:
logger.warning_once(
"Detected %d prompt updates for `mm_items[%r][%s]`. "
"Multiple prompt updates per item is now "
"deprecated and may be removed in v0.13. "
"Instead, please specify dynamic update targets "
"in the same prompt update definition by passing "
"a function to `PromptUpdate.target`.",
len(prompt_updates),
modality,
item_idx,
)

return mm_prompt_updates

def _find_mm_placeholders(
Expand Down