Skip to content

Commit 0de0b4d

Browse files
author
Zhou Caifa
committed
fix: mli#37
feat: quantization
1 parent 69b6a39 commit 0de0b4d

File tree

1 file changed

+17
-6
lines changed

1 file changed

+17
-6
lines changed

autocut/transcribe.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def __init__(self, args):
1616
self.args = args
1717
self.sampling_rate = 16000
1818
self.whisper_model = None
19+
self.quantized_model = None
1920
self.vad_model = None
2021
self.detect_speech = None
2122

@@ -68,14 +69,24 @@ def _detect_voice_activity(self, audio):
6869
def _transcribe(self, audio, speech_timestamps):
6970
tic = time.time()
7071
if self.whisper_model is None:
71-
self.whisper_model = whisper.load_model(self.args.whisper_model, self.args.device)
72-
72+
self.whisper_model = whisper.load_model(
73+
self.args.whisper_model, self.args.device
74+
)
75+
self.quantized_model = torch.quantization.quantize_dynamic(
76+
self.whisper_model, {torch.nn.Linear}, dtype=torch.qint8
77+
)
7378
res = []
7479
# TODO, a better way is merging these segments into a single one, so whisper can get more context
80+
print(self.args.device == 'cpu')
7581
for seg in speech_timestamps:
76-
r = self.whisper_model.transcribe(
77-
audio[int(seg['start']):int(seg['end'])],
78-
task='transcribe', language=self.args.lang, initial_prompt=self.args.prompt)
82+
r = whisper.transcribe(
83+
self.quantized_model if self.args.device == 'cpu'
84+
else self.whisper_model,
85+
audio = audio[int(seg['start']):int(seg['end'])],
86+
task='transcribe',
87+
language=self.args.lang,
88+
initial_prompt=self.args.prompt
89+
)
7990
r['origin_timestamp'] = seg
8091
res.append(r)
8192
logging.info(f'Done transcription in {time.time() - tic:.1f} sec')
@@ -115,7 +126,7 @@ def _save_md(self, md_fn, srt_fn, video_fn):
115126

116127
md = utils.MD(md_fn, self.args.encoding)
117128
md.clear()
118-
md.add_done_edditing(False)
129+
md.add_done_editing(False)
119130
md.add_video(os.path.basename(video_fn))
120131
md.add(f'\nTexts generated from [{os.path.basename(srt_fn)}]({os.path.basename(srt_fn)}).'
121132
'Mark the sentences to keep for autocut.\n'

0 commit comments

Comments
 (0)