-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathAudio processing using Pydub
134 lines (105 loc) · 3.62 KB
/
Audio processing using Pydub
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Import necessary libraries
from pydub import AudioSegment
import speech_recognition as sr
# Input audio file to be sliced
audio = AudioSegment.from_wav("1.wav")
'''
Step #1 - Slicing the audio file into smaller chunks.
'''
# Length of the audiofile in milliseconds
n = len(audio)
# Variable to count the number of sliced chunks
counter = 1
# Text file to write the recognized audio
fh = open("recognized.txt", "w+")
# Interval length at which to slice the audio file.
# If length is 22 seconds, and interval is 5 seconds,
# The chunks created will be:
# chunk1 : 0 - 5 seconds
# chunk2 : 5 - 10 seconds
# chunk3 : 10 - 15 seconds
# chunk4 : 15 - 20 seconds
# chunk5 : 20 - 22 seconds
interval = 5 * 1000
# Length of audio to overlap.
# If length is 22 seconds, and interval is 5 seconds,
# With overlap as 1.5 seconds,
# The chunks created will be:
# chunk1 : 0 - 5 seconds
# chunk2 : 3.5 - 8.5 seconds
# chunk3 : 7 - 12 seconds
# chunk4 : 10.5 - 15.5 seconds
# chunk5 : 14 - 19.5 seconds
# chunk6 : 18 - 22 seconds
overlap = 1.5 * 1000
# Initialize start and end seconds to 0
start = 0
end = 0
# Flag to keep track of end of file.
# When audio reaches its end, flag is set to 1 and we break
flag = 0
# Iterate from 0 to end of the file,
# with increment = interval
for i in range(0, 2 * n, interval):
# During first iteration,
# start is 0, end is the interval
if i == 0:
start = 0
end = interval
# All other iterations,
# start is the previous end - overlap
# end becomes end + interval
else:
start = end - overlap
end = start + interval
# When end becomes greater than the file length,
# end is set to the file length
# flag is set to 1 to indicate break.
if end >= n:
end = n
flag = 1
# Storing audio file from the defined start to end
chunk = audio[start:end]
# Filename / Path to store the sliced audio
filename = 'chunk'+str(counter)+'.wav'
# Store the sliced audio file to the defined path
chunk.export(filename, format ="wav")
# Print information about the current chunk
print("Processing chunk "+str(counter)+". Start = "
+str(start)+" end = "+str(end))
# Increment counter for the next chunk
counter = counter + 1
# Slicing of the audio file is done.
# Skip the below steps if there is some other usage
# for the sliced audio files.
'''
Step #2 - Recognizing the chunk and writing to a file.
'''
# Here, Google Speech Recognition is used
# to take each chunk and recognize the text in it.
# Specify the audio file to recognize
AUDIO_FILE = filename
# Initialize the recognizer
r = sr.Recognizer()
# Traverse the audio file and listen to the audio
with sr.AudioFile(AUDIO_FILE) as source:
audio_listened = r.listen(source)
# Try to recognize the listened audio
# And catch expections.
try:
rec = r.recognize_google(audio_listened)
# If recognized, write into the file.
fh.write(rec+" ")
# If google could not understand the audio
except sr.UnknownValueError:
print("Could not understand audio")
# If the results cannot be requested from Google.
# Probably an internet connection error.
except sr.RequestError as e:
print("Could not request results.")
# Check for flag.
# If flag is 1, end of the whole audio reached.
# Close the file and break.
if flag == 1:
fh.close()
break