From e2d8a249588b32bd1ad40f6e1bbc7d9270fc929f Mon Sep 17 00:00:00 2001 From: Quentin Anthony Date: Sat, 18 Mar 2023 14:30:31 -0400 Subject: [PATCH] Move conditional tiktoken import to the __init__ func (#842) * Move conditional tiktoken import to the __init__ func * Update NeoXArgs docs automatically --------- Co-authored-by: github-actions --- configs/neox_arguments.md | 2 +- megatron/tokenizer/tokenizer.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/configs/neox_arguments.md b/configs/neox_arguments.md index 6bb2ded07..416ad5f83 100644 --- a/configs/neox_arguments.md +++ b/configs/neox_arguments.md @@ -111,7 +111,7 @@ Logging Arguments - **git_hash**: str - Default = 7d682df + Default = 6dd0344 current git hash of repository diff --git a/megatron/tokenizer/tokenizer.py b/megatron/tokenizer/tokenizer.py index a9b74fc23..7c81d0ff4 100644 --- a/megatron/tokenizer/tokenizer.py +++ b/megatron/tokenizer/tokenizer.py @@ -353,13 +353,13 @@ def eod(self): class TiktokenTokenizer(AbstractTokenizer): """Tokenizer from OpenAI's tiktoken implementation""" - try: - import tiktoken - except ModuleNotFoundError: - print("Please install tiktoken: (https://github.com/openai/tiktoken)") - raise Exception - def __init__(self, vocab_file): + try: + import tiktoken + except ModuleNotFoundError: + print("Please install tiktoken: (https://github.com/openai/tiktoken)") + raise Exception + name = "TiktokenTokenizer" super().__init__(name)