diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index 65f73d00ba4..4c4513dc2e2 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -293,9 +293,12 @@ def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs): def describe(self, image, max_tokens=1024): b64 = self.image2base64(image) + prompt = self.prompt(b64) + prompt[0]["content"][1]["type"] = "text" + res = self.client.chat.completions.create( model=self.model_name, - messages=self.prompt(b64), + messages=prompt, max_tokens=max_tokens, ) return res.choices[0].message.content.strip(), res.usage.total_tokens