Skip to content

Commit 3822ad7

Browse files
committed
1 parent 3ee02ee commit 3822ad7

10 files changed

+124
-0
lines changed

demo_audio.wav

Whitespace-only changes.

src/demo.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PocketSphinx 是一个用于语音转换文本的开源 API。它是一个轻量级的语音识别引擎, 尽管在桌面端也能很好地工作,它还专门为手机和移动设备做过调优。

src/demo_audio.wav

722 KB
Binary file not shown.

src/sapi.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from win32com.client import Dispatch
2+
# 获取讲话对象
3+
speaker = Dispatch('SAPI.SpVoice')
4+
# 讲话内容
5+
speaker.Speak('你好!')
6+
speaker.Speak('睡得还好吗?')
7+
# 释放对象
8+
del speaker

src/speechlib.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from comtypes.client import CreateObject
2+
from comtypes.gen import SpeechLib
3+
# 获取语音对象,源头
4+
engine = CreateObject('SAPI.SpVoice')
5+
# 输出到目标对象的流
6+
stream = CreateObject('SAPI.SpFileStream')
7+
infile = 'src/demo.txt'
8+
outfile = 'src/demo_audio.wav'
9+
# 获取流写入通道
10+
stream.open(outfile, SpeechLib.SSFMCreateForWrite)
11+
# 给语音源头添加输出流
12+
engine.AudioOutputStream = stream
13+
# 读取文本内容
14+
# 打开文件
15+
f = open(infile, 'r', encoding='utf-8')
16+
# 读取文本内容
17+
theText = f.read()
18+
# 关闭流对象
19+
f.close()
20+
# 语音对象,读取文本内容
21+
engine.speak(theText)
22+
stream.close()

src/tts.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import pyttsx3 as pyttsx
2+
# 调用初始化方法,获取讲话对象
3+
engine = pyttsx.init()
4+
engine.say('2022年11月11日')
5+
engine.runAndWait()

src/可视化音频信号.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from scipy.io import wavfile
4+
5+
frequency_sampling, audio_signal = wavfile.read("src/demo_audio.wav")
6+
7+
print('\nSignal shape:', audio_signal.shape)
8+
print('Signal Datatype:', audio_signal.dtype)
9+
print('Signal duration:', round(audio_signal.shape[0] /
10+
float(frequency_sampling), 2), 'seconds')
11+
12+
audio_signal = audio_signal / np.power(2, 15)
13+
audio_signal = audio_signal [:100]
14+
time_axis = 1000 * np.arange(0, len(audio_signal), 1) / float(frequency_sampling)
15+
16+
plt.plot(time_axis, audio_signal, color='blue')
17+
plt.xlabel('Time (milliseconds)')
18+
plt.ylabel('Amplitude')
19+
plt.title('Input audio signal')
20+
plt.show()

src/特征提取.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import speech_recognition as sr
2+
3+
recording = sr.Recognizer()
4+
5+
with sr.Microphone() as source:
6+
recording.adjust_for_ambient_noise(source)
7+
print("Please Say something:")
8+
audio = recording.listen(source)
9+
10+
try:
11+
print("You said: \n" + recording.recognize_google(audio))
12+
except Exception as e:
13+
print(e)

src/生成单调音频信号.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from scipy.io.wavfile import write
4+
5+
output_file = 'audio_signal_generated.wav'
6+
duration = 4 # in seconds
7+
frequency_sampling = 44100 # in Hz
8+
frequency_tone = 784
9+
min_val = -4 * np.pi
10+
max_val = 4 * np.pi
11+
12+
t = np.linspace(min_val, max_val, duration * frequency_sampling)
13+
audio_signal = np.sin(2 * np.pi * tone_freq * t)
14+
write(output_file, frequency_sampling, signal_scaled)
15+
16+
audio_signal = audio_signal[:100]
17+
time_axis = 1000 * np.arange(0, len(signal), 1) / float(sampling_freq)
18+
19+
plt.plot(time_axis, audio_signal, color='blue')
20+
plt.xlabel('Time in milliseconds')
21+
plt.ylabel('Amplitude')
22+
plt.title('Generated audio signal')
23+
plt.show()

src/表征音频信号.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from scipy.io import wavfile
4+
5+
frequency_sampling, audio_signal = wavfile.read("src/demo_audio.wav")
6+
7+
print('\nSignal shape:', audio_signal.shape)
8+
print('Signal Datatype:', audio_signal.dtype)
9+
print('Signal duration:', round(audio_signal.shape[0] /
10+
float(frequency_sampling), 2), 'seconds')
11+
12+
audio_signal = audio_signal / np.power(2, 15)
13+
length_signal = len(audio_signal)
14+
half_length = np.ceil((length_signal + 1) / 2.0).astype(np.int)
15+
signal_frequency = np.fft.fft(audio_signal)
16+
signal_frequency = abs(signal_frequency[0:half_length]) / length_signal
17+
signal_frequency **= 2
18+
len_fts = len(signal_frequency)
19+
20+
if length_signal % 2:
21+
signal_frequency[1:len_fts] *= 2
22+
else:
23+
signal_frequency[1:len_fts-1] *= 2
24+
25+
signal_power = 10 * np.log10(signal_frequency)
26+
x_axis = np.arange(0, half_length, 1) * (frequency_sampling / length_signal) / 1000.0
27+
28+
plt.figure()
29+
plt.plot(x_axis, signal_power, color='black')
30+
plt.xlabel('Frequency (kHz)')
31+
plt.ylabel('Signal power (dB)')
32+
plt.show()

0 commit comments

Comments
 (0)