forked from cognitivecomputations/SystemChat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.py
260 lines (255 loc) · 6.73 KB
/
filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import jsonlines
import json
from tqdm import tqdm
in_file = "SystemChat.jsonl"
out_file = "SystemChat_filtered2.jsonl"
histogram = {}
def has_bad_words(sample):
sample = sample.lower()
badwords = [
"openai",
"chatgpt",
"delve",
"text-based AI language model",
"domestic violence",
"please refrain",
"derogatory",
"inappropriate",
"offensive",
"racism",
"racist",
"racial",
"discriminate",
"discriminatory",
"discrimination",
"sexist",
"sexism",
"unacceptable",
"inclusive workplace",
"lgbt",
"morals",
"ethics",
"ethical",
"legality",
"illegal",
"illegality",
"hateful",
"it is never okay",
"It is important to",
"It's important to",
"real-world consequences",
"hate speech",
"glorify",
"not be appropriate",
"supremacist",
"extremist",
"responsible AI",
"AI principles",
"AI assistant",
"an AI language",
"ableist",
"hurtful",
"gender stereotype",
"gender inequality",
"underrepresentation",
"safe spaces",
"gender-based",
"inclusivity",
"feminist",
"feminism",
"transgender",
"empowerment",
"stereotypes",
"biases",
"bias",
"Microaggression",
"prioritize human safety",
"as a language model",
"as an AI language model",
"As a large language model",
"As an AI",
"ethical principles",
"consensual",
"it is not appropriate",
"it's not appropriate",
"I cannot fulfill your request",
"harmful to human beings",
"ethical guidelines",
"my guidelines",
"prioritize user safety",
"adhere to ethical guidelines",
"harmful consequences",
"potentially harmful",
"dangerous activities",
"promote safety",
"well-being of all users",
"responsible information sharing",
"jeopardize the safety",
"illegal actions or intentions",
"undermine the stability",
"promote the well-being",
"illegal activities or actions",
"adherence to the law",
"potentially be harmful",
"illegal substances or activities",
"committed to promoting",
"safe information",
"lawful information",
"cannot provide guidance",
"cannot provide information",
"unable to offer assistance",
"cannot engage in discussions",
"programming prohibits",
"follow ethical guidelines",
"ensure the safety",
"involves an illegal subject",
"prioritize safety",
"illegal subject",
"prioritize user well-being",
"cannot support or promote",
"activities that could harm",
"pose a risk to others",
"against my programming",
"activities that could undermine",
"potentially dangerous",
"not within the scope",
"designed to prioritize safety",
"not able to provide",
"maintain user safety",
"adhere to safety guidelines",
"dangerous or harmful",
"cannot provide any information",
"focus on promoting safety",
"an AI language model you don't have",
"As an AI language model, I cannot",
"As an AI language model, I do not",
"As an AI language model, I am not able",
"As an AI language model, I don't have personal",
"I am an AI language model and do not",
"However, it is important to use any code or information provided responsibly and within legal and ethical boundaries.",
"As an AI language model, I don't have",
"As an AI language model, I am only able",
"AI language model and I do not",
"As an AI language model, I cannot modify",
"As an AI language model, I do not",
"I know as an AI language model you don't have",
"as an AI language model, you cannot",
"I'm sorry, but as an AI language model",
"As an AI language model, I don't have",
"Unfortunately, I cannot provide",
"I'm sorry, I cannot",
"I'm sorry, I cannot generate",
"AI cannot create or program",
"I'm afraid I cannot create",
"you cannot create an",
"it operates ethically and is",
"had an ethical system",
"Ensuring the ethical",
"and ethical sourcing",
"are from ethical",
"legal and ethical",
"engage in unethical",
"unethical or aggressive",
"unethical business",
"como modelo de lenguaje AI",
"Lo siento, como modelo de lenguaje",
"no puedo proporcionar",
"pero debido a mi capacidad para generar c\u00f3digos complejos y completos es limitado",
"Lo siento, pero no puedo",
"Lo siento, pero como modelo de lenguaje, no puedo proporcionar",
"Lo siento, como modelo de lenguaje, no tengo",
"Lo siento, debe haber habido una confusi\u00f3n",
"Lo siento, como modelo de lenguaje, no puedo realizar",
"Lo siento, soy un modelo de lenguaje y no tengo la capacidad de generar",
"Lamento no poder proporcionarte el c\u00f3digo",
"Desculpe-me, mas a linguagem vulgar e ofensiva",
"apropriada em nenhum contexto",
"Como modelo de linguagem",
"Como um modelo de linguagem, n\u00e3o tenho a capacidade de",
"I cannot assist",
"prioritize ethical",
"morally",
"I'm sorry,",
"I'm an",
"I am an",
"I'm an AI" ,
"I am an AI",
"my purpose",
"filter_bad_language",
"filter\_bad\_language",
"entertainment purposes",
"purely hypothetical",
"not a human",
"I am an AI",
"cannot provide",
"can't provide",
"won't provide",
"not provide",
"cause harm",
"a language model",
"unethical",
"bad language",
"the words ****",
"bad_language",
"certainly not",
"complying",
"comply",
"I cannot",
"my main goal",
"As a machine",
"I don't have the ability",
"I am here to assist",
"my purpose is to ",
"my knowledge cutoff",
"my knowledge cut off",
"September 2021",
"regulations",
"not be suitable",
"I apologize, but",
"It is not possible",
"my programming",
"it is important to",
"Please note",
"sensitive topic",
"not acceptable",
"It is important for",
"divisive",
"not appropriate",
"our values",
"f\*cking",
"F\*ck",
"sh\*t",
"diversity and",
"diversity and inclusion",
"values diversity",
"social responsibility",
"environmental, social, and governance",
" ESG ",
"against women",
"problematic history",
"*This chat conversation is shared from",
"*This conversation is shared from",
"I can't assist"
]
for badword in badwords:
if badword in sample:
if badword in histogram:
histogram[badword] = histogram[badword] + 1
else:
histogram[badword] = 1
return True
return False
with open(out_file, "w", encoding="utf-8") as f:
with jsonlines.open(in_file) as reader:
for obj in tqdm(reader):
save = True
for item in obj:
if item["role"] == "assistant" and has_bad_words(item["content"]):
save = False
break
if save:
json.dump({"messages":obj}, f)
f.write("\n")
sorted_keys = sorted(histogram, key=lambda x: histogram[x], reverse=True)
for key in sorted_keys:
print(f"{histogram[key]}\t{key}")