File tree 10 files changed +124
-0
lines changed
10 files changed +124
-0
lines changed Original file line number Diff line number Diff line change
1
+ PocketSphinx 是一个用于语音转换文本的开源 API。它是一个轻量级的语音识别引擎, 尽管在桌面端也能很好地工作,它还专门为手机和移动设备做过调优。
Original file line number Diff line number Diff line change
1
+ from win32com .client import Dispatch
2
+ # 获取讲话对象
3
+ speaker = Dispatch ('SAPI.SpVoice' )
4
+ # 讲话内容
5
+ speaker .Speak ('你好!' )
6
+ speaker .Speak ('睡得还好吗?' )
7
+ # 释放对象
8
+ del speaker
Original file line number Diff line number Diff line change
1
+ from comtypes .client import CreateObject
2
+ from comtypes .gen import SpeechLib
3
+ # 获取语音对象,源头
4
+ engine = CreateObject ('SAPI.SpVoice' )
5
+ # 输出到目标对象的流
6
+ stream = CreateObject ('SAPI.SpFileStream' )
7
+ infile = 'src/demo.txt'
8
+ outfile = 'src/demo_audio.wav'
9
+ # 获取流写入通道
10
+ stream .open (outfile , SpeechLib .SSFMCreateForWrite )
11
+ # 给语音源头添加输出流
12
+ engine .AudioOutputStream = stream
13
+ # 读取文本内容
14
+ # 打开文件
15
+ f = open (infile , 'r' , encoding = 'utf-8' )
16
+ # 读取文本内容
17
+ theText = f .read ()
18
+ # 关闭流对象
19
+ f .close ()
20
+ # 语音对象,读取文本内容
21
+ engine .speak (theText )
22
+ stream .close ()
Original file line number Diff line number Diff line change
1
+ import pyttsx3 as pyttsx
2
+ # 调用初始化方法,获取讲话对象
3
+ engine = pyttsx .init ()
4
+ engine .say ('2022年11月11日' )
5
+ engine .runAndWait ()
Original file line number Diff line number Diff line change
1
+ import numpy as np
2
+ import matplotlib .pyplot as plt
3
+ from scipy .io import wavfile
4
+
5
+ frequency_sampling , audio_signal = wavfile .read ("src/demo_audio.wav" )
6
+
7
+ print ('\n Signal shape:' , audio_signal .shape )
8
+ print ('Signal Datatype:' , audio_signal .dtype )
9
+ print ('Signal duration:' , round (audio_signal .shape [0 ] /
10
+ float (frequency_sampling ), 2 ), 'seconds' )
11
+
12
+ audio_signal = audio_signal / np .power (2 , 15 )
13
+ audio_signal = audio_signal [:100 ]
14
+ time_axis = 1000 * np .arange (0 , len (audio_signal ), 1 ) / float (frequency_sampling )
15
+
16
+ plt .plot (time_axis , audio_signal , color = 'blue' )
17
+ plt .xlabel ('Time (milliseconds)' )
18
+ plt .ylabel ('Amplitude' )
19
+ plt .title ('Input audio signal' )
20
+ plt .show ()
Original file line number Diff line number Diff line change
1
+ import speech_recognition as sr
2
+
3
+ recording = sr .Recognizer ()
4
+
5
+ with sr .Microphone () as source :
6
+ recording .adjust_for_ambient_noise (source )
7
+ print ("Please Say something:" )
8
+ audio = recording .listen (source )
9
+
10
+ try :
11
+ print ("You said: \n " + recording .recognize_google (audio ))
12
+ except Exception as e :
13
+ print (e )
Original file line number Diff line number Diff line change
1
+ import numpy as np
2
+ import matplotlib .pyplot as plt
3
+ from scipy .io .wavfile import write
4
+
5
+ output_file = 'audio_signal_generated.wav'
6
+ duration = 4 # in seconds
7
+ frequency_sampling = 44100 # in Hz
8
+ frequency_tone = 784
9
+ min_val = - 4 * np .pi
10
+ max_val = 4 * np .pi
11
+
12
+ t = np .linspace (min_val , max_val , duration * frequency_sampling )
13
+ audio_signal = np .sin (2 * np .pi * tone_freq * t )
14
+ write (output_file , frequency_sampling , signal_scaled )
15
+
16
+ audio_signal = audio_signal [:100 ]
17
+ time_axis = 1000 * np .arange (0 , len (signal ), 1 ) / float (sampling_freq )
18
+
19
+ plt .plot (time_axis , audio_signal , color = 'blue' )
20
+ plt .xlabel ('Time in milliseconds' )
21
+ plt .ylabel ('Amplitude' )
22
+ plt .title ('Generated audio signal' )
23
+ plt .show ()
Original file line number Diff line number Diff line change
1
+ import numpy as np
2
+ import matplotlib .pyplot as plt
3
+ from scipy .io import wavfile
4
+
5
+ frequency_sampling , audio_signal = wavfile .read ("src/demo_audio.wav" )
6
+
7
+ print ('\n Signal shape:' , audio_signal .shape )
8
+ print ('Signal Datatype:' , audio_signal .dtype )
9
+ print ('Signal duration:' , round (audio_signal .shape [0 ] /
10
+ float (frequency_sampling ), 2 ), 'seconds' )
11
+
12
+ audio_signal = audio_signal / np .power (2 , 15 )
13
+ length_signal = len (audio_signal )
14
+ half_length = np .ceil ((length_signal + 1 ) / 2.0 ).astype (np .int )
15
+ signal_frequency = np .fft .fft (audio_signal )
16
+ signal_frequency = abs (signal_frequency [0 :half_length ]) / length_signal
17
+ signal_frequency **= 2
18
+ len_fts = len (signal_frequency )
19
+
20
+ if length_signal % 2 :
21
+ signal_frequency [1 :len_fts ] *= 2
22
+ else :
23
+ signal_frequency [1 :len_fts - 1 ] *= 2
24
+
25
+ signal_power = 10 * np .log10 (signal_frequency )
26
+ x_axis = np .arange (0 , half_length , 1 ) * (frequency_sampling / length_signal ) / 1000.0
27
+
28
+ plt .figure ()
29
+ plt .plot (x_axis , signal_power , color = 'black' )
30
+ plt .xlabel ('Frequency (kHz)' )
31
+ plt .ylabel ('Signal power (dB)' )
32
+ plt .show ()
You can’t perform that action at this time.
0 commit comments