diff --git a/fast_tokenizer/perf/perf.py b/fast_tokenizer/perf/perf.py index bf129067138c..0b40060b2c71 100755 --- a/fast_tokenizer/perf/perf.py +++ b/fast_tokenizer/perf/perf.py @@ -17,14 +17,10 @@ import tensorflow as tf import tensorflow_text as tf_text +from transformers import AutoTokenizer -import paddle -import paddlenlp +from paddlenlp.experimental import FastTokenizer, to_tensor from paddlenlp.transformers import BertTokenizer -from paddlenlp.experimental import FastTokenizer -from paddlenlp.experimental import to_tensor - -from transformers import AutoTokenizer parser = argparse.ArgumentParser() diff --git a/fast_tokenizer/python/fast_tokenizer/decoders/__init__.py b/fast_tokenizer/python/fast_tokenizer/decoders/__init__.py index 0d8c499c3d38..142e5af3345a 100644 --- a/fast_tokenizer/python/fast_tokenizer/decoders/__init__.py +++ b/fast_tokenizer/python/fast_tokenizer/decoders/__init__.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple, Union, Tuple, List, Dict - from abc import ABC +from typing import List + from .. import C diff --git a/fast_tokenizer/python/fast_tokenizer/normalizers/__init__.py b/fast_tokenizer/python/fast_tokenizer/normalizers/__init__.py index 3383eb57f564..6e8fa6e45b2d 100644 --- a/fast_tokenizer/python/fast_tokenizer/normalizers/__init__.py +++ b/fast_tokenizer/python/fast_tokenizer/normalizers/__init__.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple, Union, Tuple, List, Dict from abc import ABC from .. import C diff --git a/fast_tokenizer/python/fast_tokenizer/postprocessors/__init__.py b/fast_tokenizer/python/fast_tokenizer/postprocessors/__init__.py index 3c12c20082f1..496c6413b76b 100644 --- a/fast_tokenizer/python/fast_tokenizer/postprocessors/__init__.py +++ b/fast_tokenizer/python/fast_tokenizer/postprocessors/__init__.py @@ -12,12 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple, Union, Tuple, List, Dict - from abc import ABC +from typing import List, Tuple, Union -from .. import C -from .. import Encoding +from .. import C, Encoding class PostProcessor(ABC): diff --git a/fast_tokenizer/python/fast_tokenizer/tokenizers_impl/ernie.py b/fast_tokenizer/python/fast_tokenizer/tokenizers_impl/ernie.py index a656978d1676..6c854faa1713 100644 --- a/fast_tokenizer/python/fast_tokenizer/tokenizers_impl/ernie.py +++ b/fast_tokenizer/python/fast_tokenizer/tokenizers_impl/ernie.py @@ -12,14 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .base_tokenizer import BaseFastTokenizer - +from fast_tokenizer import Tokenizer, decoders +from fast_tokenizer.models import FastWordPiece, WordPiece from fast_tokenizer.normalizers import BertNormalizer -from fast_tokenizer.pretokenizers import BertPreTokenizer -from fast_tokenizer.models import WordPiece, FastWordPiece from fast_tokenizer.postprocessors import BertPostProcessor -from fast_tokenizer import decoders -from fast_tokenizer import Tokenizer +from fast_tokenizer.pretokenizers import BertPreTokenizer + +from .base_tokenizer import BaseFastTokenizer __all__ = ["ErnieFastTokenizer"] @@ -89,7 +88,7 @@ def __init__( tokenizer.postprocessor = BertPostProcessor((str(sep_token), sep_token_id), (str(cls_token), cls_token_id)) tokenizer.decoder = decoders.WordPiece(prefix=wordpieces_prefix) - if max_sequence_len == None: + if max_sequence_len is None: tokenizer.disable_truncation() else: tokenizer.enable_truncation(max_sequence_len) diff --git a/fast_tokenizer/python/tests/test_byte_level_pretokenizer.py b/fast_tokenizer/python/tests/test_byte_level_pretokenizer.py index cfde94077b77..9f42234acf9b 100644 --- a/fast_tokenizer/python/tests/test_byte_level_pretokenizer.py +++ b/fast_tokenizer/python/tests/test_byte_level_pretokenizer.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import os import unittest + from fast_tokenizer import pretokenizers diff --git a/fast_tokenizer/python/tests/test_clip_tokenizer.py b/fast_tokenizer/python/tests/test_clip_tokenizer.py index a0ac269c0bee..a6124b4f8c44 100644 --- a/fast_tokenizer/python/tests/test_clip_tokenizer.py +++ b/fast_tokenizer/python/tests/test_clip_tokenizer.py @@ -12,11 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import os import unittest -import fast_tokenizer from fast_tokenizer import ClipFastTokenizer, models from paddlenlp.utils.downloader import get_path_from_url diff --git a/fast_tokenizer/setup.py b/fast_tokenizer/setup.py index 31ae12b643a9..d172cda979ca 100644 --- a/fast_tokenizer/setup.py +++ b/fast_tokenizer/setup.py @@ -13,13 +13,8 @@ # limitations under the License. import os -import re -import subprocess -import sys -import multiprocessing -import setuptools -from setuptools import setup, Distribution, Extension +from setuptools import Distribution, setup from setuptools.command.install import install