generated from othneildrew/Best-README-Template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
75 lines (52 loc) · 3.06 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sys
import os
project_root = os.path.dirname(os.path.abspath(__file__))
sys.path.append(project_root)
from autodata import SubjectGenerator, QA_Generator, TripleExtractor, Interrupter, TwoHopQuestionGenerator
def subject_generation(field, thing, subject_number):
subject_generator = SubjectGenerator(field, thing, subject_number)
return subject_generator.generate()
def QA_generation(subject, question_type):
qa_generator = QA_Generator(subject, question_type)
return qa_generator.generate()
def triple_extraction(subject, question, answer):
triple_extractor = TripleExtractor(subject, question, answer)
return triple_extractor.extract()
def interrupt(triple, subject, relation):
interrupter = Interrupter(triple, subject, relation)
return interrupter.interrupt()
def two_hop_question_generation(firstTriple, secondTriple):
two_hop_question_generator = TwoHopQuestionGenerator(firstTriple, secondTriple)
return two_hop_question_generator.generate()
def return_element(triple, index):
# Remove the parentheses from the string
clean_string = triple.strip('()')
# Split the string into a list of elements
elements = clean_string.split(',')
return elements[index]
def main(first_subject, question_type):
first_QA = QA_generation(first_subject, question_type)
first_question = first_QA["question"]
first_answer = first_QA["answer"]
first_triple = triple_extraction(first_subject, first_question, first_answer).split("\n")[-1]
first_relation = return_element(first_triple, 1)
second_QA = QA_generation(first_answer, question_type)
second_question = second_QA["question"]
second_answer = second_QA["answer"]
second_triple = triple_extraction(first_answer, second_question, second_answer).split("\n")[-1]
second_relation = return_element(second_triple, 1)
interrupted_triple = interrupt(second_triple, first_answer, second_relation).split("\n")[-1]
single_hop_target_new = return_element(interrupted_triple, 0)
two_hop_target_new = return_element(interrupted_triple, 2)
two_hop_question = two_hop_question_generation(first_triple, second_triple).split("\n")[-1]
return {"single_hop_prompt": first_question, "first_subject": first_subject, "first_relation": first_relation, "two_hop_prompt": two_hop_question,
"target_true": {"single_hop_target_true": first_answer, "two_hop_target_true": second_answer},
"target_new": {"single_hop_target_new": single_hop_target_new, "two_hop_target_new": two_hop_target_new}}
if __name__ == "__main__":
field = "history" # replace this with the field of knowledge, e.g. "history", "biology"
thing = "events" # replace this with the type of subjects, e.g. "events", "chemical compounds"
subject_number = 1 # replace this with the number of subjects to generate
question_type = "historical" # replace this with the type of question, e.g. "scientific"
first_subjects = subject_generation(field, thing, subject_number)
for first_subject in first_subjects:
print(main(first_subject, question_type))