From e8e96be609f32167c40acda1d89575f5dccfb855 Mon Sep 17 00:00:00 2001
From: qtencent7 <101330409+qtencent7@users.noreply.github.com>
Date: Sat, 30 Mar 2024 21:51:51 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E7=94=9F=E6=88=90?=
 =?UTF-8?q?=E5=AD=97=E5=85=B8=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore |  1 +
 main.py    | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 main.py
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..723ef36
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..4cff34d
--- /dev/null
+++ b/main.py
@@ -0,0 +1,29 @@
+chat_list = [
+    {"question": "It’s time to get up", "answer": "Get up soon"},
+    {"question": "I usually sleep late on Saturdays", "answer": "Hurry up"},
+    {"question": "It’s still early", "answer": "Did the alarm go off"}
+]
+# to do continue
+
+def generate_dict(corpus):
+    dictionary = None
+    dictSet = set()
+    for dict in corpus:
+        questionArr = dict['question'].split()
+        answerArr = dict['answer'].split()
+        for word in questionArr:
+            dictSet.add(word)
+        for word in answerArr:
+            dictSet.add(word)
+    dictionary = list(dictSet)
+    return dictionary
+
+STOP_WORDS = {
+    "PAD": "<PAD>",
+    "SOS": "<SOS>",
+    "EOS": "<EOS>"
+}
+if __name__ == "__main__":
+    dict = generate_dict(chat_list)
+    for key in STOP_WORDS:
+        dict.append(STOP_WORDS[key])