-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompression.py
159 lines (124 loc) · 9.86 KB
/
compression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import openai
from openai import OpenAI
import json
import logging
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
retry_if_exception_type
)
class LongFormCompression:
def __init__(self, api_key: str, gpt_model: str = "gpt-4-turbo", max_tokens: int = 1024, lang: str = "English"):
self.model = gpt_model
self.max_tokens = max_tokens
self.client = OpenAI(api_key=api_key)
self.lang = lang
@retry(
retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.RateLimitError, openai.InternalServerError, openai.Timeout)),
wait=wait_random_exponential(multiplier=1, max=60),
stop=stop_after_attempt(10))
def llm_query(self, system_message: str = "You are a helpful assistant.", prompt: str = "Return YEET.", print_output: bool = False):
"""
Generate the response from the GPT model.
:param system_message: the system message for the model
:param prompt: the prompt for the model
:param print_output: whether to print the output
:return: the response from the model
"""
# API reference: https://platform.openai.com/docs/api-reference/chat/create
response = self.client.chat.completions.create(model=self.model, # GPT model
max_tokens=self.max_tokens, # Maximum tokens in the prompt AND response
response_format={ "type": "json_object" }, # Return the response as a JSON object
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
])
if response.choices[0].finish_reason != "stop":
print(f"The response was not properly generated. The reason for the early stop was: {response.choices[0].finish_reason}.")
return None
if print_output:
print(response)
# Return the response
return response.choices[0].message.content
def managed_memory(self, observation: str = None, prev_context: str = None, lmax: int = 200):
"""
Implementation of long context managed memory using an LLM.
Here, the context is iteratively compressed to a fixed size, following Chain of Density summarization.
This compressed context is later used to make a more informed translation.
:param observation: the most recent observation from the translation
:param prev_context: the previous context
:param lmax: the maximum length of the context
:return: the compressed context in plain text
"""
if prev_context is None: # Avoid using prev_context on first call when it's not available and replace with objective.
prev_context = "There is no previous context. This is the first translation."
system_message = f"""Help me keep track of all relevant details while I perform a translation by creating a new summary using an existing one and new information.
The translation is a Japanese to {self.lang} translation and I want you to help me remember everything important about the story."""
prompt_old = f"""Existing Summary from the previous Translation: {prev_context}
The most recent Observation from the Japanese text was: {observation}
You will generate new increasingly concise, entity-dense summaries based on the above Existing Summary and most recent Observation.
Perform the following 2 steps 3 times.
Step 1. If possible, identify 1-3 Informative Entities (";" delimited) from the most recent Observation which are missing from the Existing Summary.
Step 2. Write a new, denser summary of identical length which covers every entity, action, and detail from the previous Existing Summary plus the Informative Entities from the Observation.
An Informative Entity is:
- Relevant: to the translation's unfolding narrative.
- Specific: descriptive yet concise (5 words or fewer).
- Novel: not in the previous summary.
- Faithful: an accurate, detailed reflection of the translation.
- Anywhere: can be derived from any part of the translation.
Guidelines:
- The first of the three summaries must be long (but less than ~{lmax} words) yet highly non-specific, containing little information beyond the entities marked as missing. Use verbose language and fillers (e.g., "In this part of the translation, the main character encounters ...") to reach ~{lmax} words.
- Make every word count: rewrite the previous summary to improve flow and make space for additional Informative Entities.
- Make space with fusion, compression, and removal of uninformative phrases like "the scenario presents".
- The summaries should become highly dense and concise yet self-contained, e.g., easily understood without referencing the fact that a translation is being performed, and contain all information of the narrative thus far.
- Informative Entities can appear anywhere in the new summary.
- Only drop the least relevant Informative Entities from the previous summary if the summary length exceeds ~{lmax} words. Otherwise carry all previous Informative Entities to the new summary.
Answer in JSON. The JSON should be a list (length 3) of dictionaries whose keys are "Informative_Entities" and "Denser_Summary". Each of these pairs should have a key from 1 to 3.
"""
mid_prompt = f"""Here is a summary of the story so far: {prev_context}
Here is the most recent part of the story: {observation}
You will generate a new summary that contains information from the story so far and the most recent part.
Guidelines:
- The summary must be shorter than ~{lmax} words.
- Make every word count
- Make space with fusion, compression, and removal of uninformative phrases like "the scenario presents".
- The summary should be highly dense and concise yet self-contained, e.g., easily understood without referencing the fact that a translation is being performed, and contain all information of the narrative thus far.
- Only drop the least relevant information from the previous summary if the summary length exceeds ~{lmax} words. Otherwise carry all previous information to the new summary.
Answer in JSON. The JSON should be a dictionaries with key "Denser_Summary" containing the new summary.
"""
prompt = f"""Existing Summary from the previous Translation: {prev_context}
The most recent Observation from the {self.lang} translation was: {observation}
You will generate new increasingly concise, entity-dense summaries based on the above Existing Summary and most recent Observation.
Keep the summaries in {self.lang}.
You will create 3 summaries. You will create each of them by following the following two setps:
- Step 1. If possible, identify 1-3 Informative Entities (";" delimited) from the most recent Observation which are missing from the Existing Summary.
- Step 2. Write a new, denser summary of identical length which covers every entity, action, and detail from the previous Existing Summary plus the Informative Entities from the Observation.
An Informative Entity is:
- Relevant: to the translation's unfolding narrative.
- Specific: descriptive yet concise (10 words or fewer).
- Novel: not in the previous summary.
- Faithful: an accurate, detailed reflection of the translation.
Guidelines:
- The first of the three summaries must be long (but less than ~{lmax} words) yet highly non-specific, containing little information beyond the entities marked as missing. Use verbose language and fillers (e.g., "In this part of the translation, the main character encounters ...") to reach ~{lmax} words.
- Make every word count: rewrite the previous summary to improve flow and make space for additional Informative Entities.
- Make space with fusion, compression, and removal of uninformative phrases like "the scenario presents".
- The summaries should become highly dense and concise yet self-contained, e.g., easily understood without referencing the fact that a translation is being performed, and contain all information of the narrative thus far.
- Informative Entities can appear anywhere in the new summary.
- Only drop the least relevant Informative Entities from the previous summary if the summary length exceeds ~{lmax} words. Otherwise carry all previous Informative Entities to the new summary.
Answer in JSON. The JSON should be a list (length 3) of dictionaries under the key "summaries". Each dictionary should contain keys "Informative_Entities" (storing the Informative Entities included in the corresponding summary) and "Denser_Summary" (containing the summary).
"""
try:
response = self.llm_query(system_message, prompt)
logging.getLogger("Experiment").debug(f"COMPRESSION API RESPONSE: \n{response}")
pprocess = json.loads(response) # Process the JSON response to extract the summary and return in plain text.
try:
key = pprocess.keys()[0]
except:
key = 'summaries'
return pprocess[key][2]['Denser_Summary']
except Exception as e:
print(f"An error occurred: {e}")
# Should there be an error, return the previous context and the most recent observation instead.
print("WARNING: The LLM did not return a valid response. The response will be set to the prev context plus most recent observation.")
return prev_context + " " + observation