-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt3_correspondence_summarize_4.py
48 lines (31 loc) · 1.31 KB
/
gpt3_correspondence_summarize_4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import openai
import glob
import json
import util
from transformers import GPT2TokenizerFast
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
openai.api_key = os.getenv("OPENAI_API_KEY")
def count_tokens(text: str) -> int:
"""count the number of tokens in a string"""
return len(tokenizer.encode(text))
page_order = json.load(open('group-ui-util/anthony-correspondence-id-sets.json'))
for resource in glob.glob('anthony-correspondence-resources/*.json'):
print(resource)
data = json.load(open(resource))
if 'gpt' not in data:
data['gpt'] = {}
if 'correspondence-summarize-1-sentences' in data['gpt']:
continue
full_text = data['full_text']
response = openai.Completion.create(
model="text-davinci-003",
prompt=f"Using only the text below extract in full dictonary JSON format who this letter was sent to, who it was sent from, on what date, summarize the contents in one sentence, and extract the organizations mentioned, using the dictonary keys recipient, sender, date, contents, organizationsMentioned:\n---\n{full_text}\n---\n",
temperature=0.25,
max_tokens=506,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
data['gpt']['correspondence-summarize-1-sentences'] = response['choices'][0]
json.dump(data,open(resource,'w'),indent=2)