@@ -190,21 +190,22 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path], fn
190190 else :
191191 tokenizer_config = {}
192192 for key , value in tokenizer_config .items ():
193- assert isinstance (value , dict ) or isinstance (value , str )
194- if key not in TOKEN_NAME_TO_ID or TOKEN_NAME_TO_ID [key ] == - 1 :
193+ if not isinstance (value , dict ) or not isinstance (value , str ):
195194 continue
196- self .special_tokens_map [TOKEN_NAME_TO_ID [key ]] = value ["content" ] if isinstance (value , dict ) else value
195+ token_id = TOKEN_NAME_TO_ID .get (key , - 1 )
196+ if token_id == - 1 :
197+ continue
198+ self .special_tokens_map [token_id ] = value ["content" ] if isinstance (value , dict ) else value
197199
198200 special_tokens : Dict [str , Any ]
199201 if fname_special_tokens is not None :
200202 special_tokens = json .load (open (fname_special_tokens ))
201203 else :
202204 special_tokens = {}
203205 for key , value in special_tokens .items ():
204- assert isinstance (value , dict ) or isinstance (value , str )
205- if key not in TOKEN_NAME_TO_ID :
206+ if not isinstance (value , dict ) or not isinstance (value , str ):
206207 continue
207- token_id = TOKEN_NAME_TO_ID [ key ]
208+ token_id = TOKEN_NAME_TO_ID . get ( key , - 1 )
208209 if token_id == - 1 or token_id in self .special_tokens_map :
209210 continue
210211 self .special_tokens_map [token_id ] = value ["content" ] if isinstance (value , dict ) else value
0 commit comments