InternLM · JiaYingLii · Jan 26, 2024 · Jan 26, 2024
diff --git a/openaoe/backend/model/internlm.py b/openaoe/backend/model/internlm.py
@@ -20,11 +20,11 @@ class InternlmChatCompletionBody(BaseModel):
     prompt: Optional[str]
     messages: List[Context]
     temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1
+    top_p: Optional[float] = 0.8
     n: Optional[int] = 1
     stop: Optional[Union[str, List]] = "false"
     presence_penalty: Optional[float] = 0
-    frequency_penalty: Optional[float] = 0
+    frequency_penalty: Optional[float] = 0.5
     user: Optional[str] = NOT_GIVEN
     stream: Optional[bool] = False
     timeout: Optional[int] = 600

diff --git a/openaoe/backend/service/service_internlm.py b/openaoe/backend/service/service_internlm.py
@@ -42,7 +42,6 @@ def chat_completion_v1(request, body: InternlmChatCompletionBody):
         "top_p": body.top_p,
         "n": body.n,
         "max_tokens": body.max_tokens,
-        "stop": "false",
         "stream": body.stream,
         "presence_penalty": body.presence_penalty,
         "frequency_penalty": body.frequency_penalty,
@@ -73,40 +72,53 @@ async def event_generator_json(count: int):
         while count > 0:
             count -= 1
             stop_flag = False
-            response = ""
             if await request.is_disconnected():
                 break
             try:
-                res = requests.post(url, headers=headers, json=data)
-                res_data = res.text.replace("data: ", "")
-                json_strings = res_data.split("\n\n")
-                for chunk in json_strings:
-                    try:
-                        json_data = json.loads(chunk)
-                        choice = json_data["choices"][0]
-                        if choice['finish_reason']:
-                            stop_flag = True
-                        if 'content' not in choice['delta']:
-                            continue
-                        s = choice["delta"]["content"]
-                        if s:
-                            response += s
-                            dict_item = {
-                                "success": "true",
-                                "msg": s
-                            }
-                            yield json.dumps(dict_item, ensure_ascii=False)
-                        if stop_flag:
-                            break
-                    except json.JSONDecodeError as e:
-                        print(f"JSON Parse Error: {e}")
-
-                if stop_flag:
-                    break
+                res = requests.post(url, headers=headers, json=data, stream=True)
+                logger.debug(f"url={url}, headers={headers}, body={data}")
+                if res:
+                    for chunk in res.iter_content(chunk_size=512, decode_unicode=True):
+                        logger.debug(f"chunk: {chunk}")
+                        res_data = chunk.replace("data: ", "")
+                        try:
+                            json_data = json.loads(res_data)
+                            logger.debug(f"json_data: {json_data}")
+                            if json_data.get("object") == "error":
+                                error_msg = json_data.get("message")
+                                dict_item = {
+                                    "success": "true",
+                                    "msg": f"LMDeploy: {error_msg} "
+                                }
+                                yield json.dumps(dict_item, ensure_ascii=False)
+                                stop_flag = True
+                            choices = json_data.get("choices")
+                            if not choices:
+                                continue
+                            choice = choices[0]
+                            if choice.get('finish_reason'):
+                                stop_flag = True
+                            if 'content' not in choice['delta']:
+                                continue
+                            s = choice["delta"]["content"]
+                            logger.debug(f"content={s}")
+                            if s:
+                                dict_item = {
+                                    "success": "true",
+                                    "msg": s
+                                }
+                                yield json.dumps(dict_item, ensure_ascii=False)
+                            if stop_flag:
+                                break
+                        except Exception as e:
+                            logger.error(f"Error: {e}")
+                    if stop_flag:
+                        break
             except Exception as e:
+                logger.error(f"{e}")
                 yield json.dumps({
                     "success": "false",
-                    "msg": str(e)
+                    "msg": f"from backend: {str(e)}"
                 })
                 break