Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions transformers/tokenization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import json
import six
import copy
import itertools
from io import open

from .file_utils import cached_path, is_tf_available, is_torch_available
Expand Down Expand Up @@ -641,9 +642,9 @@ def split_on_tokens(tok_list, text):
tokenized_text += [sub_text]
text_list = tokenized_text

return sum((self._tokenize(token, **kwargs) if token not \
return list(itertools.chain.from_iterable((self._tokenize(token, **kwargs) if token not \
in self.added_tokens_encoder and token not in self.all_special_tokens \
else [token] for token in tokenized_text), [])
else [token] for token in tokenized_text)))

added_tokens = list(self.added_tokens_encoder.keys()) + self.all_special_tokens
tokenized_text = split_on_tokens(added_tokens, text)
Expand Down