-
Notifications
You must be signed in to change notification settings - Fork 0
/
server.py
225 lines (190 loc) · 10.1 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import os
import base64
import time
import re
import requests
import json
import asyncio
from colorama import Fore
import wget
#for gpt and whisper STT
from openai import OpenAi
#audio processing
from pydub import AudioSegment, effects
import soundfile as sf
from pedalboard import Pedalboard, Chorus, Reverb, PitchShift, Delay
#TTS obviously
from elevenlabs import set_api_key
from elevenlabs import generate, save, stream
from gtts import gTTS
#browser shit
import ssl
from flask import request, session, Flask, send_file
from flask_socketio import SocketIO, Namespace, disconnect
from flask_cors import CORS
##
# Danomation
# GitHub: https://github.com/danomation
# Patreon https://www.patreon.com/Wintermute310
##
##
# API KEYS
client = OpenAI(api_key = "")
elevenlabs_api_key = ""
set_api_key(elevenlabs_api_key)
##
###
# APP VARIABLES
#
tts_provider = "google" # acceptable options, google (free) or elevenlabs (yes)
recordings_dir = "path/to/your/recordings/dir/" # notice the slash after
ssl_cert = "/path/to/your/fullchain.pem"
ssl_key = "/path/to/your/privkey.pem"
##
#define flask app
app = Flask("gptvoicewebsite")
socketio = SocketIO(app, cors_allowed_origins="*")
CORS(app, resources={r"/*": {"origins": "*"}})
#ssl shit
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
ssl_context.check_hostname = False
ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
if not os.path.exists(filePath):
os.makedirs(filePath)
#file for what to output during dubious whisper hallucinations
mp3url = 'https://github.com/anars/blank-audio/blob/master/750-milliseconds-of-silence.mp3'
path_to_output_file = recordings_dir + "silence.mp3"
wget.download(url, path_to_output_file)
@app.errorhandler(Exception)
def handle_exception(e):
# this section is supposed to handle errors but I haven't seen one yet. I probably need to handle ssl errors in addition to this.
if isinstance(e, HTTPException):
return e
return render_template("500_generic.html", e=e), 500
def sendgpt(message, session_history):
messages = []
messages = session_history
chat = client.chat.completions.create(
model="gpt-4o",
messages=messages,
temperature=1.2,
max_tokens=1024,
user="web"
)
session_history.append({"role": "assistant","content": chat.choices[0].message.content},)
reply = str(chat.choices[0].message.content)
return reply, session_history
def sendtts(message):
time_stamp = str(time.time())
file_path = recordings_dir + "reply_" + time_stamp + ".mp3"
if tts_provider == "elevenlabs":
voice="Rachel",
#voice="vKECufy6OSQM8LSmvMEi", #my voice selection private to me - wintermute
model="eleven_turbo_v2",
stream=False
)
save(audio, file_path)
return file_path
else:
#use free gTTS and add some style to it.
tts = gTTS(message, tld="us") #'us') # tld='co.uk')
tts.save(file_path)
#add input from gTTS to pydub for processing and eventual input to spotify pedalboard
sound = AudioSegment.from_mp3(file_path)
file_path2 = recordings_dir + "reply_" + time_stamp + ".wav"
sound = sound._spawn(sound.raw_data, overrides={
"frame_rate": int(sound.frame_rate * 1.5)
})
sound.export(file_path2, format="wav")
# Read in the wav to Pedalboard
audio, sample_rate = sf.read(file_path2)
# Add some style to it
board = Pedalboard([Chorus(), Reverb(room_size=0.35), PitchShift(semitones=-11)])
effected = board(audio, sample_rate)
file_path3 = recordings_dir + "reply_" + time_stamp + ".wav"
# Write the updated audio (wav file):
sf.write(file_path3, effected, sample_rate)
return file_path3
user_sessions = {}
class AudioNamespace(Namespace):
def __init__(self, namespace=None):
super().__init__(namespace)
self.user_sessions = {}
def on_connect(self):
session['sid'] = request.sid
print('-- Session ' + str(session['sid']) + ' connected to /audio')
#userSessions[session['sid']] = {'history': []}
self.user_sessions[session['sid']] = {'history': [{"role": "system","content": "Act as a cyberpunk robot named Wintermute. Reply Limit is 35 words. Don't use urls, Hashtags, or emojis"},]}
def on_disconnect(self):
print('-- Session ' + str(session['sid']) + ' disconnected from /audio')
print('A user disconnected from /audio')
def on_audio(self, data):
# Handle the audio data from the client here. Define their SID for sessions
current_session = self.user_sessions[request.sid]
def on_upload_audio(self, audioData):
# Grab the audio from the session and decode the base64 data
print('-- Received uploaded audio file for session ' + str(session['sid'])) #, audioData)
audioBuffer = base64.b64decode(audioData.split(",")[1])
# Path for the new audio file
filePath = recordings_dir
outputfile = f"{int(time.time())}_audio.ogg"
filePath = os.path.join(filePath, outputfile)
#Write the audio data to a new .ogg file
with open(filePath, "wb") as audioFile:
audioFile.write(audioBuffer)
print('-- File saved for session ' + str(session['sid']))
audio_file = open(recordings_dir + outputfile, "rb")
transcript = client.audio.transcriptions.create(model="whisper-1", file=audio_file))
print(transcript)
transcript = str(transcript.text)
#regex to match only english characters - helps with whisper hallucinations.
#This conditional is just throwing out dubious recordings and then ultimately sending to GPT, then TTS, then back to user
expression = r'[^\x00-\x7F]+'
match = re.search(expression, transcript)
if match:
filePathSend = recordings_dir + "silent.mp3" #grab it from https://github.com/anars/blank-audio/blob/master/750-milliseconds-of-silence.mp3
with open(filePathSend, "rb") as audioToSend:
audioDataSend = audioToSend.read()
dataToSend = {'type': 'audio/mp3',
'data': audioDataSend # This is the audio blob data to send
}
self.emit('audio', dataToSend, room=session['sid'])
elif transcript == "Please click Subscribe and LIKE. It's a big help to me." or transcript == "If you have any questions, please leave a comment." or transcript == "Thank you for your time." or transcript == "If you find the video useful, please like, share the video, and subscribe. Thanks for watching it." or transcript == "If you have any questions, please post them in the comments." or transcript == "Please subscribe to my channel." or transcript == "Thank you for watching the video." or transcript == "If you have any questions or comments, please post them in the comments section." or transcript == "If you find the video useful, please like, share the video, and subscribe." or transcript == "" or transcript == "If you find the video useful, please like, share the video, and subscribe. Thanks for watching." or transcript == "If you have any questions or other problems, please post them in the comments." or transcript == "チャンネル登録をお願いいたします" or transcript == "먹방끝 빠이빠이" or transcript == "Bye for now." or transcript == "Thanks for watching!" or transcript == ". ." or transcript == "Дякую за перегляд!" or transcript == "to to to" or transcript == "Дякуємо за перегляд і до зустрічі у наступному відео!" or transcript == "Thank you for watching." or transcript == "Peace." or transcript == "MBC 뉴스 이덕영입니다." or transcript == "Oh" or transcript == "You" or transcript == "you" or transcript == "oh":
#a metric fuckton of hallucinations
filePathSend = recordings_dir + silent.mp3" #grab it from https://github.com/anars/blank-audio/blob/master/750-milliseconds-of-silence.mp3
with open(filePathSend, "rb") as audioToSend:
audioDataSend = audioToSend.read()
dataToSend = {'type': 'audio/mp3',
'data': audioDataSend # This is the audio blob data to send
}
self.emit('audio', dataToSend, room=session['sid'])
else:
#output this user's transcript to console
print(str(session['sid']) + Fore.YELLOW + " user: " + Fore.WHITE + transcript)
f = open(recordings_dir + "chat.txt", "a")
f.write(str(session['sid'][:4 ]) + "@switchmeme: " + transcript + "\n") #grab the session sid for the chat
f.close()
current_session = self.user_sessions[request.sid]
current_session['history'].append({"role": "user","content": transcript},) # depending on what you want to add to history
# Get the first history item
first = [current_session['history'][0]]
# Get the last 10 history items
last_ten = current_session['history'][-10:]
selected_history = first + last_ten
# send conversation and last ten history to gpt and return the response with the updated history
response, current_session['history'] = sendgpt(transcript, selected_history)
#output wintermute's response to console
print(str(session['sid']) + Fore.GREEN + " Wintermute: " + Fore.WHITE + response)
f = open(recordings_dir + "chat.txt", "a")
f.write("wintermute: " + response + "\n\n")
f.close()
response_audio_path = sendtts(response)
filePathSend = response_audio_path
with open(filePathSend, "rb") as audioToSend:
audioDataSend = audioToSend.read()
dataToSend = {'type': 'audio/wav', # Update this to match the audio data type
'data': audioDataSend # This is the audio blob data to send
}
self.emit('audio', dataToSend, room=session['sid'])
socketio.on_namespace(AudioNamespace('/audio'))
socketio.run(app, host='0.0.0.0', port=5000, ssl_context=ssl_context, allow_unsafe_werkzeug=True)