From e8e96be609f32167c40acda1d89575f5dccfb855 Mon Sep 17 00:00:00 2001 From: qtencent7 <101330409+qtencent7@users.noreply.github.com> Date: Sat, 30 Mar 2024 21:51:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=94=9F=E6=88=90?= =?UTF-8?q?=E5=AD=97=E5=85=B8=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + main.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 .gitignore create mode 100644 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..723ef36 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..4cff34d --- /dev/null +++ b/main.py @@ -0,0 +1,29 @@ +chat_list = [ + {"question": "It’s time to get up", "answer": "Get up soon"}, + {"question": "I usually sleep late on Saturdays", "answer": "Hurry up"}, + {"question": "It’s still early", "answer": "Did the alarm go off"} +] +# to do continue + +def generate_dict(corpus): + dictionary = None + dictSet = set() + for dict in corpus: + questionArr = dict['question'].split() + answerArr = dict['answer'].split() + for word in questionArr: + dictSet.add(word) + for word in answerArr: + dictSet.add(word) + dictionary = list(dictSet) + return dictionary + +STOP_WORDS = { + "PAD": "", + "SOS": "", + "EOS": "" +} +if __name__ == "__main__": + dict = generate_dict(chat_list) + for key in STOP_WORDS: + dict.append(STOP_WORDS[key])