diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py
index fe8cc5906..9a65deaec 100644
--- a/exo/api/chatgpt_api.py
+++ b/exo/api/chatgpt_api.py
@@ -71,16 +71,9 @@ def generate_completion(
     }
 
   choice = completion["choices"][0]
-  print(f"\nchoice {choice}")
   if object_type.startswith("chat.completion"):
     key_name = "delta" if stream else "message"
-
-    token_decode = tokenizer.batch_decode(
-       tokens,
-       skip_special_tokens=True,
-       clean_up_tokenization_spaces=False
-    )
-    choice[key_name] = {"role": "assistant", "content": token_decode}
+    choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)}
   elif object_type == "text_completion":
     choice["text"] = tokenizer.decode(tokens)
   else:
diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py
index 11f8eddb3..676e31620 100644
--- a/exo/inference/pytorch/inference.py
+++ b/exo/inference/pytorch/inference.py
@@ -14,6 +14,7 @@
 from exo.download.hf.hf_shard_download import HFShardDownloader
 
 from transformers import AutoTokenizer
+
 # llama
 from transformers.models.llama.modeling_llama import LlamaModel