Update tokenizer.py

HMUNACHI · Apr 13, 2024 · 30b1751 · 30b1751
1 parent 6a951af
commit 30b1751
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/nanodl/__src/utils/tokenizer.py b/nanodl/__src/utils/tokenizer.py
@@ -55,8 +55,8 @@ class Tokenizer:
     ```
     """
     def __init__(self, 
-                 training_data: List[str], 
-                 vocab_size: int, 
+                 training_data: List[str] = None, 
+                 vocab_size: int = None, 
                  model_type: str = "bpe", 
                  max_sentence_length: int = 512,
                  model_path: Optional[str] = None):
@@ -104,4 +104,4 @@ def encode(self,
     def decode(self, 
                t: List[int]) -> str:
         """Converts a list of tokens back into a string."""
-        return self.sp_model.decode(t)
+        return self.sp_model.decode(t)