-
Notifications
You must be signed in to change notification settings - Fork 0
/
WS_classify.py
175 lines (149 loc) · 6.07 KB
/
WS_classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import sys
import pyaudio
import argparse
import numpy as np
import essentia
import atexit
import threading as th
import time
import core.WS_utils
from core.WS_network import *
class Classifier:
def __init__(self):
weights_path = "core/" + WS_global_data.weights_path
self.network = NeuralNetwork(weights_file=weights_path)
def classify_file(filepath):
self.network.test_on_file(filepath, 2)
class StreamClassifier(Classifier):
def __init__(self, buffer_size = WS_global_data.buffer_size,
channels = WS_global_data.channels,
sample_rate = WS_global_data.sample_rate):
Classifier.__init__(self)
self.buffer_size = buffer_size
self.sample_rate = sample_rate
self.channels = channels
self.format = pyaudio.paInt32
# self.lock = th.Lock()
self.paudio = pyaudio.PyAudio()
self.stream = self.paudio.open(format=self.format,
channels = self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer = self.buffer_size)
self.frames = []
self.buffer = np.array([])
atexit.register(self.stop_listening)
def __stream_callback(self, in_data, frame_count, time_info, status):
self.frames.append(in_data)
# Convert from string audio representation to float numpy array
frames_float = np.fromstring(in_data, 'Int32')
# Horizontal stack - convert nd array to 1d array by stacking columns
self.buffer = np.hstack((self.buffer, frames_float))
return None, pyaudio.paContinue
def record(self, seconds):
self.clear()
for i in range(0, int(self.sample_rate / self.buffer_size * seconds)):
frame_raw = self.stream.read(self.buffer_size)
frame_audio = np.fromstring(frame_raw, 'Int32')
self.frames.append(frame_raw)
self.buffer = np.hstack((self.buffer, frame_audio))
return (self.buffer, self.frames)
def clear(self):
self.buffer = np.array([])
self.frames = []
def start_listening(self):
self.stream.start_stream()
def stop_listening(self):
self.stream.stop_stream()
def get_buffer(self):
with self.lock:
result_buffer = self.buffer
result_raw = self.frames
self.buffer = np.array([])
self.frames = []
return (result_buffer, result_raw)
CHUNK = 1024
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 3
SIZE = 16
def record_audio():
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording (3 seconds)")
frames = []
audio = np.array([])
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
floats = np.fromstring(data, 'Float32')
frames.append(data)
audio = np.hstack((audio,floats))
print("* done")
stream.stop_stream()
stream.close()
p.terminate()
WS_utils.write_audio_signal(frames,'recording.wav')
return audio
if __name__ == "__main__":
# Get filename from arguments
args = sys.argv[1:]
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", help="The path of the file to classify")
parser.add_argument("-d", "--dir", help="The directory to classify")
parser.add_argument("-t", "--target", help="The target audio class")
parser.add_argument("-r", "--realtime", help="Enable realtime classification",
action="store_true")
args = parser.parse_args()
if (not (args.file or args.realtime or args.dir)):
print "Incorrect usage "
parser.print_help()
sys.exit(0)
print "\n\n. . . . . . . . . . . WhatSound classifier . . . . . . . . . . . \n"
if args.file:
print "Classifier mode: [file]"
filepath = args.file
print "Filename: " + filepath
fc = Classifier()
if args.target:
fc.network.test_on_file(filepath,
audio_class = args.target,
verbose=False)
else:
audio_class = fc.network.test_on_file(filepath, verbose=False)
print "--------------------" + \
"\nAudio class: " + audio_class
if args.dir:
print "Classifier mode : directory"
directory = args.dir
fc = Classifier()
if args.target:
fc.network.test_on_dir(directory,
audio_class = args.target, structured=False,
verbose = False)
else:
fc.network.test_on_dir(directory, verbose=False)
elif args.realtime:
print "Classifier mode : [real-time]"
print "Initialising PyAudio...\n"
sc = StreamClassifier()
sc.start_listening()
# WS_utils.clear_console()
print "\nInitialisation finished. \nListening...\n"
while(1):
audio, frames = sc.record(WS_global_data.record_length)
# Currently testing on a .wav file recorded every x seconds
# The raw audio signal doesn't seem to map well to the audio signal
# read from a file in Essentia, so the feature values are all in the
# wrong range.
# Need to look for a fix, as reading in the array is much faster than
# a file.
WS_utils.write_audio_signal(frames, '../realtime/testfile.wav')
# sc.network.test_on_signal(audio)
class_label = sc.network.test_on_file('../realtime/testfile.wav' , verbose=False)
sys.stdout.write("\rAudio class: (***** %s *****)" % class_label)
sys.stdout.flush()