From b518f73fcf4e803431b619a50012226aa7b92e78 Mon Sep 17 00:00:00 2001
From: risingsunomi <vincentcastro@gmail.com>
Date: Mon, 7 Oct 2024 12:06:29 -0800
Subject: [PATCH] comma and other text issue fix

---
 exo/api/chatgpt_api.py             | 9 +--------
 exo/inference/pytorch/inference.py | 1 +
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/exo/api/chatgpt_api.py b/exo/api/chatgpt_api.py
index fe8cc5906..9a65deaec 100644
--- a/exo/api/chatgpt_api.py
+++ b/exo/api/chatgpt_api.py
@@ -71,16 +71,9 @@ def generate_completion(
     }
 
   choice = completion["choices"][0]
-  print(f"\nchoice {choice}")
   if object_type.startswith("chat.completion"):
     key_name = "delta" if stream else "message"
-
-    token_decode = tokenizer.batch_decode(
-       tokens,
-       skip_special_tokens=True,
-       clean_up_tokenization_spaces=False
-    )
-    choice[key_name] = {"role": "assistant", "content": token_decode}
+    choice[key_name] = {"role": "assistant", "content": tokenizer.decode(tokens)}
   elif object_type == "text_completion":
     choice["text"] = tokenizer.decode(tokens)
   else:
diff --git a/exo/inference/pytorch/inference.py b/exo/inference/pytorch/inference.py
index 11f8eddb3..676e31620 100644
--- a/exo/inference/pytorch/inference.py
+++ b/exo/inference/pytorch/inference.py
@@ -14,6 +14,7 @@
 from exo.download.hf.hf_shard_download import HFShardDownloader
 
 from transformers import AutoTokenizer
+
 # llama
 from transformers.models.llama.modeling_llama import LlamaModel