Skip to content

Commit

Permalink
bugfix: llava-hf/llava-interleave-qwen-7b-hf (#2497)
Browse files Browse the repository at this point in the history
- fix init raise exception because tie_word_embeddings config
- max_batch_size option for start
  • Loading branch information
deepindeed2022 committed Oct 25, 2024
1 parent 89f52bc commit 6ef3a7e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 9 deletions.
4 changes: 3 additions & 1 deletion lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from lmdeploy.archs import get_task
from lmdeploy.messages import (GenerationConfig, LogitsProcessor,
PytorchEngineConfig, TurbomindEngineConfig)
PytorchEngineConfig, TurbomindEngineConfig, VisionConfig)
from lmdeploy.model import ChatTemplateConfig
from lmdeploy.serve.async_engine import AsyncEngine
from lmdeploy.serve.openai.protocol import ( # noqa: E501
Expand Down Expand Up @@ -1054,13 +1054,15 @@ def serve(model_path: str,

_, pipeline_class = get_task(model_path)

vision_config = VisionConfig(kwargs.get("vision_max_batch_size", 1))
VariableInterface.async_engine = pipeline_class(
model_path=model_path,
model_name=model_name,
backend=backend,
backend_config=backend_config,
chat_template_config=chat_template_config,
max_log_len=max_log_len,
vision_config=vision_config,
**kwargs)

if proxy_url is not None:
Expand Down
35 changes: 27 additions & 8 deletions lmdeploy/vl/model/llava_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,40 @@ def build_model(self):
warnings.simplefilter('ignore')
from transformers import LlavaForConditionalGeneration
model = LlavaForConditionalGeneration._from_config(self.hf_config)

if not getattr(model.config, "tie_word_embeddings", False):
if not self.with_llm:
del model.language_model
for key in ['language_model']:
setattr(model, key, None)
else:
self.vl_model = model
with disable_logging():
load_checkpoint_and_dispatch(
model=model,
max_memory=self.max_memory,
checkpoint=self.model_path,
device_map='auto' if not self.with_llm else {'': 'cpu'},
no_split_module_classes=['CLIPEncoderLayer', 'SiglipEncoderLayer'],
dtype=torch.half)
else:
# fix for llava-hf/llava-interleave-qwen-7b-hf
# we have to remove llm after init model for using call llm.get_output_embedding()
with disable_logging():
load_checkpoint_and_dispatch(
model=model,
max_memory=self.max_memory,
checkpoint=self.model_path,
device_map='auto' if not self.with_llm else {'': 'cpu'},
no_split_module_classes=['CLIPEncoderLayer', 'SiglipEncoderLayer'],
dtype=torch.half)
if not self.with_llm:
del model.language_model
for key in ['language_model']:
setattr(model, key, None)
else:
self.vl_model = model

with disable_logging():
load_checkpoint_and_dispatch(
model=model,
max_memory=self.max_memory,
checkpoint=self.model_path,
device_map='auto' if not self.with_llm else {'': 'cpu'},
no_split_module_classes=['CLIPEncoderLayer'],
dtype=torch.half)
model.eval()
self.model = model
# processor
Expand Down

0 comments on commit 6ef3a7e

Please sign in to comment.