Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
MRotaryEmbedding,
XDRotaryEmbedding,
)
from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader
from vllm.model_executor.model_loader import get_model_loader
from vllm.model_executor.model_loader.reload import (
finalize_layerwise_reload,
initialize_layerwise_reload,
Expand Down Expand Up @@ -194,7 +194,6 @@
)

if TYPE_CHECKING:
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
from vllm.v1.spec_decode.ngram_proposer import NgramProposer

Expand Down Expand Up @@ -4504,16 +4503,6 @@ def reload_weights(
weights_not_loaded,
)

def save_tensorized_model(
self,
tensorizer_config: "TensorizerConfig",
) -> None:
TensorizerLoader.save_model(
self.get_model(),
tensorizer_config=tensorizer_config,
model_config=self.model_config,
)

def _get_prompt_logprobs_dict(
self,
hidden_states: torch.Tensor,
Expand Down
10 changes: 5 additions & 5 deletions vllm/v1/worker/gpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from vllm.v1.worker.worker_base import WorkerBase
from vllm.v1.worker.workspace import init_workspace_manager

from ...model_executor.model_loader import TensorizerLoader
from .gpu.warmup import warmup_kernels
from .utils import request_memory

Expand Down Expand Up @@ -836,12 +837,11 @@ def save_sharded_state(
max_size=max_size,
)

def save_tensorized_model(
self,
tensorizer_config: "TensorizerConfig",
) -> None:
self.model_runner.save_tensorized_model(
def save_tensorized_model(self, tensorizer_config: "TensorizerConfig") -> None:
TensorizerLoader.save_model(
self.get_model(),
tensorizer_config=tensorizer_config,
model_config=self.model_config,
)

def init_weight_transfer_engine(self, init_info: dict) -> None:
Expand Down