Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nemo/export/vllm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sentencepiece import SentencePieceProcessor
from transformers import PreTrainedTokenizerBase
from vllm import LLMEngine
from vllm.transformers_utils.tokenizer_group.tokenizer_group import TokenizerGroup
from vllm.transformers_utils.tokenizer_group import TokenizerGroup

from nemo.export.sentencepiece_tokenizer import SentencePieceTokenizer
from nemo.export.tarutils import TarPath
Expand Down
2 changes: 2 additions & 0 deletions nemo/export/vllm/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def __init__(
)
self.is_attention_free = self._init_attention_free()
self.has_inner_state = self._init_has_inner_state()
self.has_noops = self._init_has_noops()

self._verify_tokenizer_mode()
self._verify_quantization()
Expand Down Expand Up @@ -209,6 +210,7 @@ def _load_hf_arguments(self, nemo_config: Dict[str, Any]) -> Dict[str, Any]:
'num_key_value_heads': 'num_query_groups',
# 'hidden_act': 'activation', ## <- vLLM has good defaults for the models, nemo values are wrong
'max_position_embeddings': ['max_position_embeddings', 'encoder_seq_length'],
'tie_word_embeddings': 'share_embeddings_and_output_weights',
'rms_norm_eps': 'layernorm_epsilon',
'attention_dropout': 'attention_dropout',
'initializer_range': 'init_method_std',
Expand Down
3 changes: 2 additions & 1 deletion nemo/export/vllm/model_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def convert_weights(self, nemo_model_config, state_dict):

yield ('model.embed_tokens.weight', state_dict['model.embedding.word_embeddings.weight'])
yield ('model.norm.weight', state_dict['model.decoder.final_layernorm.weight'])
yield ('lm_head.weight', state_dict['model.output_layer.weight'])
if not nemo_model_config.get("share_embeddings_and_output_weights", False):
yield ('lm_head.weight', state_dict['model.output_layer.weight'])

for layer in range(int(num_layers)):
qkv_weights = state_dict['model.decoder.layers.self_attention.linear_qkv.weight'][layer]
Expand Down
4 changes: 2 additions & 2 deletions nemo/export/vllm/tokenizer_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

from vllm.config import TokenizerPoolConfig
from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import BaseTokenizerGroup
from vllm.transformers_utils.tokenizer_group import TokenizerGroup

from nemo.export.sentencepiece_tokenizer import SentencePieceTokenizer


class NemoTokenizerGroup(BaseTokenizerGroup):
class NemoTokenizerGroup(TokenizerGroup):
"""
Implements a custom tokenizer for vLLM, based on SentencePieceTokenizer.
"""
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements_vllm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pangu
rouge_score
sacrebleu
scikit-learn
vllm==0.8.2
vllm==0.8.5
webdataset>=0.2.86
wget
zarr>=2.18.2,<3.0.0
Loading