-
Notifications
You must be signed in to change notification settings - Fork 1
/
helper.py
109 lines (93 loc) · 5.47 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, pipeline, AutoModelForSequenceClassification
from FlagEmbedding import BGEM3FlagModel
'''
Embedding, sentiment analysis in this function
hybrid_lorebook_pulling also works by vector similarity.
Referenced from memory.py for supa/hypa/hanurai memory
'''
# find documents via activation words match
def filter_docs_by_words(lorebook, activation_words):
word_based_docs = {}
for doc in lorebook:
if any(any(word.lower() in doc.lower() for word in sublist) for sublist in activation_words):
word_based_docs[doc] = "Activation Word Match"
return word_based_docs
# format chat history into a single query string to be used in embedding models(Isn't used on dragon multiturn)
def format_chat_history(chat_history):
return '\n'.join([f"{turn['role']}: {turn['content']}" for turn in chat_history]).strip()
# Same function, but if role is user, change it to user_name
def format_user_chat_history(chat_history, user_name):
return '\n'.join([f"{user_name if turn['role'] == 'user' else turn['role']}: {turn['content']}" for turn in chat_history]).strip()
# pull relevant documents from the lorebook based on chat history embedding model, activation words match
def hybrid_lorebook_pulling(chat_history=[], lorebook=[], activation_words=[], prob_threshold=0.2):
try:
# Initialize tokenizers and models
tokenizer = AutoTokenizer.from_pretrained('nvidia/dragon-multiturn-query-encoder')
query_encoder = AutoModel.from_pretrained('nvidia/dragon-multiturn-query-encoder')
context_encoder = AutoModel.from_pretrained('nvidia/dragon-multiturn-context-encoder')
# Format query and encode it
formatted_query = format_chat_history(chat_history)
query_input = tokenizer(formatted_query, return_tensors='pt')
query_emb = query_encoder(**query_input).last_hidden_state[:, 0, :]
# Encode context from the lorebook
ctx_input = tokenizer(lorebook, padding=True, truncation=True, max_length=512, return_tensors='pt')
ctx_emb = context_encoder(**ctx_input).last_hidden_state[:, 0, :]
# Calculate similarities and find relevant documents
similarities = query_emb.matmul(ctx_emb.transpose(0, 1))
softmax_values = F.softmax(similarities, dim=-1).squeeze()
relevant_docs_indices = (softmax_values > prob_threshold).nonzero(as_tuple=True)[0].tolist()
relevant_docs = {lorebook[i]: softmax_values[i].item() for i in relevant_docs_indices}
# Find documents based on activation words
word_based_docs = filter_docs_by_words(lorebook, activation_words)
# Merge and return the results
merged_docs = {**word_based_docs, **relevant_docs}
result_list = list(merged_docs.keys())
return '\n'.join(result_list) if result_list else "No additional information"
except IndexError:
return "No additional information"
except Exception as e:
print(f"Unexpected error: {e}")
return "No additional information"
def embed_chat_history_bgem3(chat_history):
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)
return
def embed_chat_history_dragon(chat_history):
tokenizer = AutoTokenizer.from_pretrained('nvidia/dragon-multiturn-query-encoder')
query_encoder = AutoModel.from_pretrained('nvidia/dragon-multiturn-query-encoder')
# Format chat history(this will be the query), not using user_name formatting
formatted_query = format_chat_history(chat_history)
query_input = tokenizer(formatted_query, return_tensors='pt')
query_emb = query_encoder(**query_input).last_hidden_state[:, 0, :]
return query_emb
def embed_context_dragon(context, user_name="user"): # either could be raw chat_history or a summarized text
tokenizer = AutoTokenizer.from_pretrained('nvidia/dragon-multiturn-query-encoder')
context_encoder = AutoModel.from_pretrained('nvidia/dragon-multiturn-context-encoder')
if isinstance(context[-1], dict):
context = [f"{user_name if turn['role'] == 'user' else turn['role']}: {turn['content']}" for turn in context]
ctx_input = tokenizer(context, padding=True, truncation=True, max_length=512, return_tensors='pt')
ctx_emb = context_encoder(**ctx_input).last_hidden_state[:, 0, :]
return ctx_emb
def similarity_retrieve(query_emb, context_emb, context=[], prob_threshold=0.2):
similarities = query_emb.matmul(context_emb.transpose(0, 1))
softmax_values = F.softmax(similarities, dim=-1).squeeze()
relevant_docs_indices = (softmax_values > prob_threshold).nonzero(as_tuple=True)[0].tolist()
relevant_docs = {context[i]: softmax_values[i].item() for i in relevant_docs_indices}
return relevant_docs
def max_token_retrieve():
# TODO: retrieve chat until max context length is reached
return
# Function to summarize chat history
def summarize_history(user_name, chat_history, model="Huzaifa367/chat-summarizer"):
formatted_query = format_user_chat_history(chat_history, user_name)
chatsum = pipeline("summarization", model=model)
# gotta use OpenAI or other API embedding someday :P
# Need to use classes at that point
return chatsum(formatted_query)
# Function to pull emotion from text
def emotion_pull(text):
model = AutoModelForSequenceClassification.from_pretrained('jitesh/emotion-english')
classifier = pipeline("text-classification", model=model, tokenizer='jitesh/emotion-english')
prediction = classifier(text)
return prediction[0]['label']