This repository has been archived by the owner on Feb 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
335 lines (270 loc) · 13.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# 1. ALL LIBRARIES
# Import models: here we add new AI models.
# from aimodels.speechenhancement.speechenhancement import load_model, prediction # dev
import os
import sys
import time
import wave # to read and write audio files
# import threading # to manage threads
# to manage 3 tasks at the same time
from multiprocessing import Process, Queue, current_process
import numpy # to treat audio data as numpy array
from rich.console import Console # For colorful output
from aimodels.speechenhancement.speechenhancement_production import ( # production
prediction_production,
prediction_production_data_as_narray)
import soundfile as sf
"""
2. GLOBAL CONFIGS.
"""
console = Console() # Create object to print colorful
console.print("[GLOBAL CONFIGS] Loading variables and constants...",
style="bold green")
# channels = 1 # Mono
# sample_rate = 44100 # Sample read to read and write
# periodsize = 320
# FORMAT = alsaaudio.PCM_FORMAT_S16_LE
# COUNTER_CACHE_LOOPS = 1000
# chunk = 1024 # set the chunk size of 1024 samples
# FORMAT_INPUT = pyaudio.paInt32 # default LE_32bits or pyaudio.paInt32
# FORMAT_OUTPUT = pyaudio.paFloat32 # default LE_32bits or pyaudio.paFloat32
channels = 1 # default mono
sample_rate_input = 44100 # default 44100
sample_rate_output = 8000 # default 8000 due restrictions, can't be more by 2021-03-02
record_seconds = 1.2 # default 1.1 due some restrictions
INITIAL_DELAY_SECONDS = 0.5 # initial delay in seconds to create threads
DELAY_IDLE = 0.01 # seconds to sleep until check again the queue
console.print("[GLOBAL CONFIGS] Finish loading variables and constants.",
style="bold green")
"""
3. FUNCTIONS
"""
def hello():
console.print("""|---------------------------------------------------------------------------------------------|
| ______ ______ ______ __ ________ ______ __ __ ________ _______ |
| / \ | \ / \ | \ | \ / \ | \ | \| \| \ |
|| $$$$$$\ \$$$$$$| $$$$$$\| $$ | $$$$$$$$| $$$$$$\| $$\ | $$| $$$$$$$$| $$$$$$$\ |
|| $$__| $$ | $$ | $$ \$$| $$ | $$__ | $$__| $$| $$$\| $$| $$__ | $$__| $$ |
|| $$ $$ | $$ | $$ | $$ | $$ \ | $$ $$| $$$$\ $$| $$ \ | $$ $$ |
|| $$$$$$$$ | $$ | $$ __ | $$ | $$$$$ | $$$$$$$$| $$\$$ $$| $$$$$ | $$$$$$$\ |
|| $$ | $$ _| $$_ | $$__/ \| $$_____ | $$_____ | $$ | $$| $$ \$$$$| $$_____ | $$ | $$ |
|| $$ | $$| $$ \ \$$ $$| $$ \| $$ \| $$ | $$| $$ \$$$| $$ \| $$ | $$ |
|\ $$ \$$ \$$$$$$ \$$$$$$ \$$$$$$$$ \$$$$$$$$ \$$ \$$ \$$ \$$ \$$$$$$$$ \$$ \$$ |
| |
|Mantainer: Pablo Diaz (github.com/zurmad) |
|Github: https://github.com/DZDL/aicleaner |
|License: MIT |
|Version: Alpha v0.2 [2020-03-26] |
|Operative System detected: {}
|Branch: MultiProcessing - 4 main Process (Server, Record, Process, Play) |
|---------------------------------------------------------------------------------------------|""".format(sys.platform), style="bold yellow")
def clean_temporal_files():
"""
Remove all files in specific paths
"""
paths_to_remove = ['temporal/']
console.print("[GLOBAL CONFIGS] Cleaning temporal files in {}".format(paths_to_remove),
style="bold green")
try:
for mypath in paths_to_remove:
for f in os.listdir(mypath):
if not 'README' in f:
os.remove(os.path.join(mypath, f))
except Exception as e:
print(e)
console.print("[GLOBAL CONFIGS] Finish cleaning temporal files in {}".format(paths_to_remove),
style="bold green")
def executeprediction(aimodel='speechenhancement',
number=-1,
process_name=None,
output=None,
asnumpyarray=False,
mydata=None):
"""
This function is a manager of ai models applied
like filters to noisy mic
"""
# Speech-Enhancement model
if aimodel == 'speechenhancement':
console.print("{}[AI MODEL] {} -[INFERENCE]-cleaning on {}".format(process_name,
aimodel,
'temporal/input_{}.wav'.format(number)),
style="bold red")
mydata_predicted = 0
if (asnumpyarray == False):
prediction_production(weights_path='aimodels/speechenhancement/weights',
name_model='model_unet',
audio_dir_prediction='temporal',
dir_save_prediction='temporal/',
audio_input_prediction=[
'input_{}.wav'.format(number)],
audio_output_prediction='output_{}.wav'.format(
number),
sample_rate=8000, # default 8000
min_duration=1.0, # default 1.1
frame_length=8064, # default 8064
hop_length_frame=8064, # default 8064
n_fft=255, # default 255
hop_length_fft=63, # default 63
output=output)
elif (asnumpyarray == True):
mydata_predicted = prediction_production_data_as_narray(frame_length=8064, # default 8064
hop_length_frame=8064, # default 8064
n_fft=255, # default 255
hop_length_fft=63, # default 63
sample_rate=sample_rate_output, # default 8000
mydata=mydata, # data as numpy
output=output)
console.print("{}[AI MODEL] Speech-Enhancement finished. Saving {}".format(process_name,
'temporal/output_{}.wav'.format(number)),
style="bold red")
return mydata_predicted
# Here you can add a new models
else:
console.print("{}[AI MODEL] No aimodel selected".format(process_name,))
return False
def record_only_Process(queue_data_recorded,
queue_data_recorded_index):
"""
This process record the audio without spaces between each data
and put the data to a queue to be processed.
"""
import soundcard as sc # Exception due Exceptions
process_name = current_process().name
start_time = time.time()
x = 0
numframes = int(sample_rate_input*record_seconds)
default_mic = sc.default_microphone()
recorder = default_mic.recorder(
sample_rate_input, channels=channels, blocksize=256)
with recorder as r:
while True:
data = r.record(numframes) # record each _x seconds_
# Threading (Latency increased drastically)
# data_queue.append(data) # append data to the queue
# process_queue.append(x) # append index
# MultiProcessing
queue_data_recorded.put(data)
queue_data_recorded_index.put(x)
# sf.write('temporal/input_{}.wav'.format(x), data, sample_rate_input, 'PCM_32')
console.print('{} RECORD: {}'.format(process_name, str(x)),
style="bold green")
x += 1 # Last line, don't move
def process_only_Process(queue_data_recorded,
queue_data_recorded_index,
queue_data_to_play,
queue_data_to_play_index):
"""
This process get the audio data and put in the digestor
as the queued by recorder. However, it check each DELAY_IDLE
to don't saturate the queue
"""
time.sleep(INITIAL_DELAY_SECONDS)
process_name = current_process().name
while True:
number = queue_data_recorded_index.get()
if number != -1:
# release the first queued but get the value
mydata = queue_data_recorded.get()
console.print('{} PROCESS: {}'.format(process_name,
str(number)), style="bold green")
# With data as numpy array
mydata_predicted = executeprediction(aimodel='speechenhancement',
number=number,
process_name=process_name,
output=None,
asnumpyarray=True,
mydata=mydata)
queue_data_to_play_index.put(number)
queue_data_to_play.put(mydata_predicted)
else:
time.sleep(DELAY_IDLE) # to don't stress cpu
def play_only_Process(queue_data_to_play,
queue_data_to_play_index):
"""
This process get the audio data and put in the player
as the queued is processed. However, it check each DELAY_IDLE
to don't saturate the queue
"""
import soundcard as sc
process_name = current_process().name
time.sleep(INITIAL_DELAY_SECONDS*2)
default_speaker = sc.default_speaker()
default_speaker_player = default_speaker.player(
sample_rate_output, channels=channels, blocksize=256)
with default_speaker_player as p:
while True:
number = queue_data_to_play_index.get()
if number != -1:
mydata_predicted = queue_data_to_play.get()
console.print('{} POPPED: {}'.format(process_name, str(number)),
style="bold green")
# sf.write('temporal/output_{}.wav'.format(number),
# mydata_predicted, sample_rate_output, 'PCM_32')
p.play(mydata_predicted)
else:
# p.play(np.asarray([0]))
time.sleep(DELAY_IDLE) # to don't stress cpu
def power_on_tensorflow_serving_Process():
"""
This process handle tensorflow server
- Need apt installed for server side/inference
- Need pip installed to develop
"""
api_port = 8501
model_name = 'model_unet'
model_base_path = os.path.dirname(os.path.abspath('main.py'))
model_relative_path = model_base_path+'/aimodels/speechenhancement/serving/'
command = 'tensorflow_model_server --rest_api_port={} --model_name={} --model_base_path="{}"'.format(api_port,
model_name,
model_relative_path)
process_name = current_process().name
console.print('{} TENSORFLOW SERVER TURNING ON...'.format(process_name),
style="bold green")
# Turn on
print(command)
os.popen(command)
console.print('{} TENSORFLOW SERVER READY.'.format(process_name),
style="bold green")
"""
4. LOOP ITERATION TO PROCESS DIRTY TO CLEAN AUDIO
"""
if __name__ == '__main__':
# Initial configurations
hello() # Logo and info of the project
clean_temporal_files() # Clean files by path
queue_data_recorded_index = Queue()
queue_data_recorded = Queue()
queue_data_to_play_index = Queue()
queue_data_to_play = Queue()
time.sleep(INITIAL_DELAY_SECONDS) # drivers slow start needed
# Turn on server of tensorflow serving
P0_SERVER_PROCESS = Process(target=power_on_tensorflow_serving_Process,
name='[P0][SERVER]')
P0_SERVER_PROCESS.start()
P0_SERVER_PROCESS.join()
time.sleep(INITIAL_DELAY_SECONDS) # drivers slow start needed
# Record voice continously
P1_RECORDER_PROCESS = Process(target=record_only_Process,
args=((queue_data_recorded),
(queue_data_recorded_index)),
name='[P1][RECORDER]')
# Process as queued
P2_PROCESSMODEL_PROCESS = Process(target=process_only_Process,
args=((queue_data_recorded),
(queue_data_recorded_index),
(queue_data_to_play),
(queue_data_to_play_index)),
name='[P2][DIGESTOR]')
# Play as finished processed
P3_PLAYER_PROCESS = Process(target=play_only_Process,
args=((queue_data_to_play),
(queue_data_to_play_index)),
name='[P3][PLAYER]')
# Charge and join threads
P1_RECORDER_PROCESS.start()
P2_PROCESSMODEL_PROCESS.start()
P3_PLAYER_PROCESS.start()
P1_RECORDER_PROCESS.join()
P2_PROCESSMODEL_PROCESS.join()
P3_PLAYER_PROCESS.join()