@@ -16,6 +16,7 @@ def __init__(self, args):
16
16
self .args = args
17
17
self .sampling_rate = 16000
18
18
self .whisper_model = None
19
+ self .quantized_model = None
19
20
self .vad_model = None
20
21
self .detect_speech = None
21
22
@@ -68,14 +69,24 @@ def _detect_voice_activity(self, audio):
68
69
def _transcribe (self , audio , speech_timestamps ):
69
70
tic = time .time ()
70
71
if self .whisper_model is None :
71
- self .whisper_model = whisper .load_model (self .args .whisper_model , self .args .device )
72
-
72
+ self .whisper_model = whisper .load_model (
73
+ self .args .whisper_model , self .args .device
74
+ )
75
+ self .quantized_model = torch .quantization .quantize_dynamic (
76
+ self .whisper_model , {torch .nn .Linear }, dtype = torch .qint8
77
+ )
73
78
res = []
74
79
# TODO, a better way is merging these segments into a single one, so whisper can get more context
80
+ print (self .args .device == 'cpu' )
75
81
for seg in speech_timestamps :
76
- r = self .whisper_model .transcribe (
77
- audio [int (seg ['start' ]):int (seg ['end' ])],
78
- task = 'transcribe' , language = self .args .lang , initial_prompt = self .args .prompt )
82
+ r = whisper .transcribe (
83
+ self .quantized_model if self .args .device == 'cpu'
84
+ else self .whisper_model ,
85
+ audio = audio [int (seg ['start' ]):int (seg ['end' ])],
86
+ task = 'transcribe' ,
87
+ language = self .args .lang ,
88
+ initial_prompt = self .args .prompt
89
+ )
79
90
r ['origin_timestamp' ] = seg
80
91
res .append (r )
81
92
logging .info (f'Done transcription in { time .time () - tic :.1f} sec' )
@@ -115,7 +126,7 @@ def _save_md(self, md_fn, srt_fn, video_fn):
115
126
116
127
md = utils .MD (md_fn , self .args .encoding )
117
128
md .clear ()
118
- md .add_done_edditing (False )
129
+ md .add_done_editing (False )
119
130
md .add_video (os .path .basename (video_fn ))
120
131
md .add (f'\n Texts generated from [{ os .path .basename (srt_fn )} ]({ os .path .basename (srt_fn )} ).'
121
132
'Mark the sentences to keep for autocut.\n '
0 commit comments