Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions vllm/v1/worker/cpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

class CPUModelRunner(GPUModelRunner):
def __init__(self, vllm_config: VllmConfig, device: torch.device):
# avoid calling accelerator APIs for methods inherited from super class
_set_torch_accelerator_to_noop()

with _torch_cuda_wrapper():
super().__init__(vllm_config, device)

Expand Down Expand Up @@ -244,3 +247,11 @@ def _set_global_compilation_settings(config: VllmConfig):
yield
finally:
torch_inductor_config.freezing = freezing_value


def _set_torch_accelerator_to_noop() -> None:
def noop(*args: Any, **kwargs: Any) -> None:
pass

torch.accelerator.synchronize = noop
torch.accelerator.empty_cache = noop
Loading