From 30b17516036f53be368f1d64720fcd8e4578f87c Mon Sep 17 00:00:00 2001 From: Henry Ndubuaku Date: Sat, 13 Apr 2024 18:06:03 +0100 Subject: [PATCH] Update tokenizer.py --- nanodl/__src/utils/tokenizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nanodl/__src/utils/tokenizer.py b/nanodl/__src/utils/tokenizer.py index f9c6807..65ecf7d 100644 --- a/nanodl/__src/utils/tokenizer.py +++ b/nanodl/__src/utils/tokenizer.py @@ -55,8 +55,8 @@ class Tokenizer: ``` """ def __init__(self, - training_data: List[str], - vocab_size: int, + training_data: List[str] = None, + vocab_size: int = None, model_type: str = "bpe", max_sentence_length: int = 512, model_path: Optional[str] = None): @@ -104,4 +104,4 @@ def encode(self, def decode(self, t: List[int]) -> str: """Converts a list of tokens back into a string.""" - return self.sp_model.decode(t) \ No newline at end of file + return self.sp_model.decode(t)