-
Notifications
You must be signed in to change notification settings - Fork 0
/
chatbot.py
233 lines (162 loc) · 7.93 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import gradio as gr
import random
import time
import os
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import BitsAndBytesConfig
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
import nest_asyncio
#################################################################
# Tokenizer
#################################################################
custom_cache_dir = "/data/Bhattacharya/.cache"
# Create the cache directory if it doesn't exist
os.makedirs(custom_cache_dir, exist_ok=True)
#model_name='mistralai/Mistral-7B-Instruct-v0.1'
model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'
model_config = transformers.AutoConfig.from_pretrained(
model_name,
cache_dir=custom_cache_dir
)
from transformers import LlamaTokenizerFast
#tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,cache_dir=custom_cache_dir)
# check the tokenizer
#print(tokenizer("Hello, this is a test sentence."), "AutoTokenizer")
#tokenizer = LlamaTokenizerFast.from_pretrained("hf-internal-testing/llama-tokenizer")
# check the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,cache_dir=custom_cache_dir)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
print(tokenizer("Hello, this is a test sentence."), "LlamaTokenizerFast")
save_directory = "/data/Bhattacharya/"
model_config.save_pretrained(save_directory)
#################################################################
# bitsandbytes parameters
#################################################################
# Activate 4-bit precision base model loading
use_4bit = True
# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
major, _ = torch.cuda.get_device_capability()
if major >= 8:
print("=" * 80)
print("Your GPU supports bfloat16: accelerate training with bf16=True")
print("=" * 80)
#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
cache_dir=custom_cache_dir
)
def print_number_of_trainable_model_parameters(model):
trainable_model_params = 0
all_model_params = 0
for _, param in model.named_parameters():
all_model_params += param.numel()
if param.requires_grad:
trainable_model_params += param.numel()
return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"
print(print_number_of_trainable_model_parameters(model))
text_generation_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
#task="text-generation",
task="text-generation",
temperature=0.1,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=3000,
)
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
#!playwright install
#!playwright install-deps
#import nest_asyncio
#nest_asyncio.apply()
# Load from local vector store FAISS
db = FAISS.load_local("/home/Bhattacharya/RAG_with_Debayan/documents/vectorstore/faiss_index",HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'),allow_dangerous_deserialization=True)
retriever = db.as_retriever(search_kwargs={"k": 5})
"""
standalone_query_generation_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
temperature=0.0,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=10000,
)
standalone_query_generation_llm = HuggingFacePipeline(pipeline=standalone_query_generation_pipeline)
from langchain.retrievers.multi_query import MultiQueryRetriever
retriever = MultiQueryRetriever.from_llm(retriever=db.as_retriever(), llm=standalone_query_generation_llm)
"""
# Create prompt template
prompt_template = """
### [INST] Instruction: Answer the question based on the context provided and chat history. If chat history is not provided, answer strictly based on context. If chat history and context is provided, answer based on context and chat history. Here is chat history to help:
{chat_history}
Here is the context to help:
{context}
### QUESTION:
{question} [/INST]
"""
# Create prompt from prompt template
prompt = PromptTemplate(
input_variables=[ "chat_history","context", "question"],
template=prompt_template,
)
from operator import itemgetter
# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)
rag_chain = (
{"chat_history": itemgetter("chat_history") , "context": retriever, "question": itemgetter("question")}
| llm_chain
)
#response = rag_chain.invoke("Explain the main idea of the paper \"Learning Robust Representation for Laryngeal Cancer Classification in Vocal Folds from Narrow Band Images\"")
with gr.Blocks() as demo:
textbox = gr.Textbox(lines=5, label="Welcome to DebayanGPT", value="I am an AI chatbot created by my master Debayan Bhattacharya. I am here to help you with your queries related to my master's research papers. \n You can ask me questions like:\n \n \"What is the main idea of the paper \"Learning Robust Representation for Laryngeal Cancer Classification in Vocal Folds from Narrow Band Images\"?\", \n \n \"What is the main novelty of the paper \"Squeeze and multi-context attention for polyp segmentation?\" \n \n \"What were the main findings of the paper Computer-Aided Diagnosis of Maxillary Sinus Anomalies: Validation and Clinical Correlation\"?\" \n \n Please note that I am a work in progress and I may not be able to answer all your questions. \n Please be patient with me. \n Thank you for your understanding. \n How can I help you today?")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Enter your question here and press Shift+ENTER", lines=2, placeholder="What is the main idea of the paper \"Learning Robust Representation for Laryngeal Cancer Classification in Vocal Folds from Narrow Band Images\"?")
clear = gr.ClearButton([msg, chatbot])
def respond(message, chat_history):
chat_history_text = ["" for _ in range(len(chat_history))]
for i in range(len(chat_history)):
chat_history_text[i] = "User: " + chat_history[i][0] + "\n" + "Chatbot: " + chat_history[i][1] + "\n"
chat_history_text = "".join(chat_history_text)
data = {"chat_history": chat_history_text, "question": message}
bot_message = rag_chain.invoke(data)
bot_message = bot_message["text"].split("[/INST]")[-1]
print("Chat history: ", chat_history_text)
print("Bot Message", bot_message)
chat_history.append((message, bot_message))
time.sleep(2)
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
demo.queue()
demo.launch(share=True)