Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust text formatting per Chinese copywriting guidelines #1949

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 52 additions & 52 deletions README.md

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions bot/ali/ali_qwen_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def reply(self, query, context=None):
return reply

else:
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
reply = Reply(ReplyType.ERROR, "Bot 不支持处理{}类型的消息".format(context.type))
return reply

def reply_text(self, session: AliQwenSession, retry_count=0) -> dict:
Expand All @@ -103,7 +103,7 @@ def reply_text(self, session: AliQwenSession, retry_count=0) -> dict:
try:
prompt, history = self.convert_messages_format(session.messages)
self.update_api_key_if_expired()
# NOTE 阿里百炼的call()函数未提供temperature参数,考虑到temperature和top_p参数作用相同,取两者较小的值作为top_p参数传入,详情见文档 https://help.aliyun.com/document_detail/2587502.htm
# NOTE 阿里百炼的 call() 函数未提供 temperature 参数,考虑到 temperature 和 top_p 参数作用相同,取两者较小的值作为 top_p 参数传入,详情见文档 https://help.aliyun.com/document_detail/2587502.htm
response = broadscope_bailian.Completions().call(app_id=self.app_id(), prompt=prompt, history=history, top_p=min(self.temperature(), self.top_p()))
completion_content = self.get_completion_content(response, self.node_id())
completion_tokens, total_tokens = self.calc_tokens(session.messages, completion_content)
Expand Down Expand Up @@ -173,7 +173,7 @@ def convert_messages_format(self, messages) -> Tuple[str, List[ChatQaMessage]]:
if user_content == '':
raise Exception('no user message')
if system_content != '':
# NOTE 模拟系统消息,测试发现人格描述以"你需要扮演ChatGPT"开头能够起作用,而以"你是ChatGPT"开头模型会直接否认
# NOTE 模拟系统消息,测试发现人格描述以"你需要扮演 ChatGPT"开头能够起作用,而以"你是 ChatGPT"开头模型会直接否认
system_qa = ChatQaMessage(system_content, '好的,我会严格按照你的设定回答问题')
history.insert(0, system_qa)
logger.debug("[QWEN] converted qa messages: {}".format([item.to_dict() for item in history]))
Expand All @@ -186,15 +186,15 @@ def get_completion_content(self, response, node_id):
text = response['Data']['Text']
if node_id == '':
return text
# TODO: 当使用流程编排创建大模型应用时,响应结构如下,最终结果在['finalResult'][node_id]['response']['text']中,暂时先这么写
# TODO: 当使用流程编排创建大模型应用时,响应结构如下,最终结果在 ['finalResult'][node_id]['response']['text'] 中,暂时先这么写
# {
# 'Success': True,
# 'Code': None,
# 'Message': None,
# 'Data': {
# 'ResponseId': '9822f38dbacf4c9b8daf5ca03a2daf15',
# 'SessionId': 'session_id',
# 'Text': '{"finalResult":{"LLM_T7islK":{"params":{"modelId":"qwen-plus-v1","prompt":"${systemVars.query}${bizVars.Text}"},"response":{"text":"作为一个AI语言模型,我没有年龄,因为我没有生日。\n我只是一个程序,没有生命和身体。"}}}}',
# 'Text': '{"finalResult":{"LLM_T7islK":{"params":{"modelId":"qwen-plus-v1","prompt":"${systemVars.query}${bizVars.Text}"},"response":{"text":"作为一个 AI 语言模型,我没有年龄,因为我没有生日。\n我只是一个程序,没有生命和身体。"}}}}',
# 'Thoughts': [],
# 'Debug': {},
# 'DocReferences': []
Expand Down
4 changes: 2 additions & 2 deletions bot/ali/ali_qwen_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def calc_tokens(self):

def num_tokens_from_messages(messages, model):
"""Returns the number of tokens used by a list of messages."""
# 官方token计算规则:"对于中文文本来说,1个token通常对应一个汉字;对于英文文本来说,1个token通常对应3至4个字母或1个单词"
# 官方 token 计算规则:"对于中文文本来说,1 个 token 通常对应一个汉字;对于英文文本来说,1 个 token 通常对应 3 至 4 个字母或 1 个单词"
# 详情请产看文档:https://help.aliyun.com/document_detail/2586397.html
# 目前根据字符串长度粗略估计token数,不影响正常使用
# 目前根据字符串长度粗略估计 token 数,不影响正常使用
tokens = 0
for msg in messages:
tokens += len(msg["content"])
Expand Down
2 changes: 1 addition & 1 deletion bot/baidu/baidu_unit_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from bridge.reply import Reply, ReplyType


# Baidu Unit对话接口 (可用, 但能力较弱)
# Baidu Unit 对话接口 (可用但能力较弱)
class BaiduUnitBot(Bot):
def reply(self, query, context=None):
token = self.get_token()
Expand Down
4 changes: 2 additions & 2 deletions bot/baidu/baidu_wenxin.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ def reply_text(self, session: BaiduWenxinSession, retry_count=0):
logger.warn("[BAIDU] Exception: {}".format(e))
need_retry = False
self.sessions.clear_session(session.session_id)
result = {"completion_tokens": 0, "content": "出错了: {}".format(e)}
result = {"completion_tokens": 0, "content": "出错了{}".format(e)}
return result

def get_access_token(self):
"""
使用 AK,SK 生成鉴权签名(Access Token)
:return: access_token,或是None(如果错误)
:return: access_token,或是 None(如果错误)
"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": BAIDU_API_KEY, "client_secret": BAIDU_SECRET_KEY}
Expand Down
4 changes: 2 additions & 2 deletions bot/baidu/baidu_wenxin_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class BaiduWenxinSession(Session):
def __init__(self, session_id, system_prompt=None, model="gpt-3.5-turbo"):
super().__init__(session_id, system_prompt)
self.model = model
# 百度文心不支持system prompt
# 百度文心不支持 system prompt
# self.reset()

def discard_exceeding(self, max_tokens, cur_tokens=None):
Expand Down Expand Up @@ -47,7 +47,7 @@ def num_tokens_from_messages(messages, model):
"""Returns the number of tokens used by a list of messages."""
tokens = 0
for msg in messages:
# 官方token计算规则暂不明确: "大约为 token数为 "中文字 + 其他语种单词数 x 1.3"
# 官方 token 计算规则暂不明确: "大约为 token 数为 "中文字 + 其他语种单词数 x 1.3"
# 这里先直接根据字数粗略估算吧,暂不影响正常使用,仅在判断是否丢弃历史会话的时候会有偏差
tokens += len(msg["content"])
return tokens
6 changes: 3 additions & 3 deletions bot/bot_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ def create_bot(bot_type):
:return: bot instance
"""
if bot_type == const.BAIDU:
# 替换Baidu Unit为Baidu文心千帆对话接口
# 替换 Baidu Unit 为 Baidu 文心千帆对话接口
# from bot.baidu.baidu_unit_bot import BaiduUnitBot
# return BaiduUnitBot()
from bot.baidu.baidu_wenxin import BaiduWenxinBot
return BaiduWenxinBot()

elif bot_type == const.CHATGPT:
# ChatGPT 网页端web接口
# ChatGPT 网页端 web 接口
from bot.chatgpt.chat_gpt_bot import ChatGPTBot
return ChatGPTBot()

elif bot_type == const.OPEN_AI:
# OpenAI 官方对话模型API
# OpenAI 官方对话模型 API
from bot.openai.open_ai_bot import OpenAIBot
return OpenAIBot()

Expand Down
12 changes: 6 additions & 6 deletions bot/chatgpt/chat_gpt_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from config import conf, load_config


# OpenAI对话模型API (可用)
# OpenAI 对话模型 API (可用)
class ChatGPTBot(Bot, OpenAIImage):
def __init__(self):
super().__init__()
Expand All @@ -34,12 +34,12 @@ def __init__(self):
self.sessions = SessionManager(ChatGPTSession, model=conf().get("model") or "gpt-3.5-turbo")
self.args = {
"model": conf().get("model") or "gpt-3.5-turbo", # 对话模型的名称
"temperature": conf().get("temperature", 0.9), # 值在[0,1]之间,越大表示回复越具有不确定性
"temperature": conf().get("temperature", 0.9), # 值在 [0,1] 之间,越大表示回复越具有不确定性
# "max_tokens":4096, # 回复最大的字符数
"top_p": conf().get("top_p", 1),
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"request_timeout": conf().get("request_timeout", None), # 请求超时时间,openai接口默认设置为600,对于难问题一般需要较长时间
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"request_timeout": conf().get("request_timeout", None), # 请求超时时间,openai 接口默认设置为 600,对于难问题一般需要较长时间
"timeout": conf().get("request_timeout", None), # 重试超时时间,在这个时间内,将会自动重试
}

Expand Down Expand Up @@ -103,7 +103,7 @@ def reply(self, query, context=None):
reply = Reply(ReplyType.ERROR, retstring)
return reply
else:
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
reply = Reply(ReplyType.ERROR, "Bot 不支持处理{}类型的消息".format(context.type))
return reply

def reply_text(self, session: ChatGPTSession, api_key=None, args=None, retry_count=0) -> dict:
Expand Down
12 changes: 6 additions & 6 deletions bot/claude/claude_ai_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def reply(self, query, context: Context = None) -> Reply:
reply = Reply(ReplyType.ERROR, res)
return reply
else:
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
reply = Reply(ReplyType.ERROR, "Bot 不支持处理{}类型的消息".format(context.type))
return reply

def get_organization_id(self):
Expand All @@ -71,10 +71,10 @@ def get_organization_id(self):
except:
if "App unavailable" in response.text:
logger.error("IP error: The IP is not allowed to be used on Claude")
self.error = "ip所在地区不被claude支持"
self.error = "ip 所在地区不被 claude 支持"
elif "Invalid authorization" in response.text:
logger.error("Cookie error: Invalid authorization of claude, check cookie please.")
self.error = "无法通过claude身份验证,请检查cookie"
self.error = "无法通过 claude 身份验证,请检查 cookie"
return None
return uuid

Expand All @@ -93,8 +93,8 @@ def check_cookie(self):

def create_new_chat(self, con_uuid):
"""
新建claude对话实体
:param con_uuid: 对话id
新建 claude 对话实体
:param con_uuid: 对话 id
:return:
"""
url = f"https://claude.ai/api/organizations/{self.org_uuid}/chat_conversations"
Expand Down Expand Up @@ -193,7 +193,7 @@ def _chat(self, query, context, retry_count=0) -> Reply:

if "rate limi" in reply_content:
logger.error("rate limit error: The conversation has reached the system speed limit and is synchronized with Cladue. Please go to the official website to check the lifting time")
return Reply(ReplyType.ERROR, "对话达到系统速率限制,与cladue同步,请进入官网查看解除限制时间")
return Reply(ReplyType.ERROR, "对话达到系统速率限制,与 cladue 同步,请进入官网查看解除限制时间")
logger.info(f"[CLAUDE] reply={reply_content}, total_tokens=invisible")
self.sessions.session_reply(reply_content, session_id, 100)
return Reply(ReplyType.TEXT, reply_content)
Expand Down
2 changes: 1 addition & 1 deletion bot/claude/claude_ai_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ class ClaudeAiSession(Session):
def __init__(self, session_id, system_prompt=None, model="claude"):
super().__init__(session_id, system_prompt)
self.model = model
# claude逆向不支持role prompt
# claude 逆向不支持 role prompt
# self.reset()
2 changes: 1 addition & 1 deletion bot/claudeapi/claude_api_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
user_session = dict()


# OpenAI对话模型API (可用)
# OpenAI 对话模型 API (可用)
class ClaudeAPIBot(Bot, OpenAIImage):
def __init__(self):
super().__init__()
Expand Down
4 changes: 2 additions & 2 deletions bot/dashscope/dashscope_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"qwen-max": dashscope.Generation.Models.qwen_max,
"qwen-bailian-v1": dashscope.Generation.Models.bailian_v1
}
# ZhipuAI对话模型API
# ZhipuAI 对话模型 API
class DashscopeBot(Bot):
def __init__(self):
super().__init__()
Expand Down Expand Up @@ -70,7 +70,7 @@ def reply(self, query, context=None):
logger.debug("[DASHSCOPE] reply {} used 0 tokens.".format(reply_content))
return reply
else:
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
reply = Reply(ReplyType.ERROR, "Bot 不支持处理{}类型的消息".format(context.type))
return reply

def reply_text(self, session: DashscopeSession, retry_count=0) -> dict:
Expand Down
4 changes: 2 additions & 2 deletions bot/gemini/google_gemini_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
from bot.baidu.baidu_wenxin_session import BaiduWenxinSession


# OpenAI对话模型API (可用)
# OpenAI 对话模型 API (可用)
class GoogleGeminiBot(Bot):

def __init__(self):
super().__init__()
self.api_key = conf().get("gemini_api_key")
# 复用文心的token计算方式
# 复用文心的 token 计算方式
self.sessions = SessionManager(BaiduWenxinSession, model=conf().get("model") or "gpt-3.5-turbo")

def reply(self, query, context: Context = None) -> Reply:
Expand Down
14 changes: 7 additions & 7 deletions bot/linkai/link_ai_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def reply(self, query, context: Context = None) -> Reply:
reply = Reply(ReplyType.ERROR, res)
return reply
else:
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
reply = Reply(ReplyType.ERROR, "Bot 不支持处理{}类型的消息".format(context.type))
return reply

def _chat(self, query, context, retry_count=0) -> Reply:
Expand Down Expand Up @@ -86,11 +86,11 @@ def _chat(self, query, context, retry_count=0) -> Reply:
body = {
"app_code": app_code,
"messages": session_message,
"model": model, # 对话模型的名称, 支持 gpt-3.5-turbo, gpt-3.5-turbo-16k, gpt-4, wenxin, xunfei
"model": model, # 对话模型的名称支持 gpt-3.5-turbo, gpt-3.5-turbo-16k, gpt-4, wenxin, xunfei
"temperature": conf().get("temperature"),
"top_p": conf().get("top_p", 1),
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"session_id": session_id,
"sender_id": session_id,
"channel_type": conf().get("channel_type", "wx")
Expand Down Expand Up @@ -250,11 +250,11 @@ def reply_text(self, session: ChatGPTSession, app_code="", retry_count=0) -> dic
body = {
"app_code": app_code,
"messages": session.messages,
"model": conf().get("model") or "gpt-3.5-turbo", # 对话模型的名称, 支持 gpt-3.5-turbo, gpt-3.5-turbo-16k, gpt-4, wenxin, xunfei
"model": conf().get("model") or "gpt-3.5-turbo", # 对话模型的名称支持 gpt-3.5-turbo, gpt-3.5-turbo-16k, gpt-4, wenxin, xunfei
"temperature": conf().get("temperature"),
"top_p": conf().get("top_p", 1),
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
}
if self.args.get("max_tokens"):
body["max_tokens"] = self.args.get("max_tokens")
Expand Down
6 changes: 3 additions & 3 deletions bot/moonshot/moonshot_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import requests


# ZhipuAI对话模型API
# ZhipuAI 对话模型 API
class MoonshotBot(Bot):
def __init__(self):
super().__init__()
Expand Down Expand Up @@ -76,7 +76,7 @@ def reply(self, query, context=None):
logger.debug("[MOONSHOT_AI] reply {} used 0 tokens.".format(reply_content))
return reply
else:
reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type))
reply = Reply(ReplyType.ERROR, "Bot 不支持处理{}类型的消息".format(context.type))
return reply

def reply_text(self, session: MoonshotSession, args=None, retry_count=0) -> dict:
Expand Down Expand Up @@ -121,7 +121,7 @@ def reply_text(self, session: MoonshotSession, args=None, retry_count=0) -> dict
logger.warn(f"[MOONSHOT_AI] do retry, times={retry_count}")
need_retry = retry_count < 2
elif res.status_code == 401:
result["content"] = "授权失败,请检查API Key是否正确"
result["content"] = "授权失败,请检查 API Key 是否正确"
elif res.status_code == 429:
result["content"] = "请求过于频繁,请稍后再试"
need_retry = retry_count < 2
Expand Down
10 changes: 5 additions & 5 deletions bot/openai/open_ai_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
user_session = dict()


# OpenAI对话模型API (可用)
# OpenAI 对话模型 API (可用)
class OpenAIBot(Bot, OpenAIImage):
def __init__(self):
super().__init__()
Expand All @@ -31,12 +31,12 @@ def __init__(self):
self.sessions = SessionManager(OpenAISession, model=conf().get("model") or "text-davinci-003")
self.args = {
"model": conf().get("model") or "text-davinci-003", # 对话模型的名称
"temperature": conf().get("temperature", 0.9), # 值在[0,1]之间,越大表示回复越具有不确定性
"temperature": conf().get("temperature", 0.9), # 值在 [0,1] 之间,越大表示回复越具有不确定性
"max_tokens": 1200, # 回复最大的字符数
"top_p": 1,
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容
"request_timeout": conf().get("request_timeout", None), # 请求超时时间,openai接口默认设置为600,对于难问题一般需要较长时间
"frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2] 之间,该值越大则更倾向于产生不同的内容
"request_timeout": conf().get("request_timeout", None), # 请求超时时间,openai 接口默认设置为 600,对于难问题一般需要较长时间
"timeout": conf().get("request_timeout", None), # 重试超时时间,在这个时间内,将会自动重试
"stop": ["\n\n\n"],
}
Expand Down
Loading