@@ -151,11 +151,23 @@ def get_tokenizer(tokenizer_model_path, model_name, is_chat=False):
151151 - TokenizerInterface: An instance of a tokenizer.
152152 """
153153 if "llama-3" in str (model_name ).lower ():
154- return Llama3ChatFormat (tokenizer_model_path ) if is_chat else TiktokenWrapper (tokenizer_model_path )
154+ return (
155+ Llama3ChatFormat (tokenizer_model_path )
156+ if is_chat
157+ else TiktokenWrapper (tokenizer_model_path )
158+ )
155159 elif "llama-2" in str (model_name ).lower ():
156- return Llama2ChatFormat (tokenizer_model_path ) if is_chat else SentencePieceWrapper (tokenizer_model_path )
160+ return (
161+ Llama2ChatFormat (tokenizer_model_path )
162+ if is_chat
163+ else SentencePieceWrapper (tokenizer_model_path )
164+ )
157165 else :
158- return TokenizersChatFormat (tokenizer_model_path ) if is_chat else TokenizersWrapper (tokenizer_model_path )
166+ return (
167+ TokenizersChatFormat (tokenizer_model_path )
168+ if is_chat
169+ else TokenizersWrapper (tokenizer_model_path )
170+ )
159171
160172
161173Role = Literal ["system" , "user" , "assistant" ]
@@ -219,7 +231,7 @@ def encode_prompt(self, prompt: str):
219231 return self .encode_dialog_prompt (messages )
220232
221233 def encode_dialog_prompt (self , dialog : List [Message ]) -> List [int ]:
222- text = self .tokenizer .apply_chat_template (dialog ,
223- tokenize = False ,
224- add_generation_prompt = True )
225- return self .encode (text )
234+ text = self .tokenizer .apply_chat_template (
235+ dialog , tokenize = False , add_generation_prompt = True
236+ )
237+ return self .encode (text )
0 commit comments