From f077b57f8be7e0b6dd0f4ac8c95dd192fdfc2fe6 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Thu, 30 May 2024 11:27:58 +0800 Subject: [PATCH] set ollama keep_alive (#985) ### What problem does this PR solve? #980 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/db/init_data.py | 4 ++-- rag/llm/chat_model.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/api/db/init_data.py b/api/db/init_data.py index 01b019ebbb..89e0cf4702 100644 --- a/api/db/init_data.py +++ b/api/db/init_data.py @@ -532,8 +532,8 @@ def init_llm_factory(): { "fid": factory_infos[12]["name"], "llm_name": "BAAI/bge-reranker-v2-m3", - "tags": "LLM,CHAT,", - "max_tokens": 16385, + "tags": "RE-RANK,2k", + "max_tokens": 2048, "model_type": LLMType.RERANK.value }, ] diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index a9530fe61e..f730c6375e 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -303,7 +303,8 @@ def chat(self, system, history, gen_conf): response = self.client.chat( model=self.model_name, messages=history, - options=options + options=options, + keep_alive=-1 ) ans = response["message"]["content"].strip() return ans, response["eval_count"] + response.get("prompt_eval_count", 0) @@ -325,7 +326,8 @@ def chat_streamly(self, system, history, gen_conf): model=self.model_name, messages=history, stream=True, - options=options + options=options, + keep_alive=-1 ) for resp in response: if resp["done"]: