-
Notifications
You must be signed in to change notification settings - Fork 0
/
subtool.py
executable file
·124 lines (102 loc) · 4.1 KB
/
subtool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
from __future__ import unicode_literals, print_function
import argparse
import ffmpeg
import logging
import sys
from vosk import Model, KaldiRecognizer, SetLogLevel
import os
import wave, json
from datetime import datetime
import contextlib
import codecs
from os.path import abspath
from pathlib import Path
SetLogLevel(0)
if not os.path.exists("model"):
print("Please download the model from https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
exit(1)
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__file__)
logger.setLevel(logging.INFO)
parser = argparse.ArgumentParser(description='Add subtitles to video by feeding audio to vosk and resulting text into (soft-)subtitled mkv. (And extract words into json for further processing)')
parser.add_argument('in_filename', help='Input filename (`-` for stdin)')
def decode_audio(in_filename, out_filename, **input_kwargs):
try:
out, err = (ffmpeg
.input(str(in_filename), **input_kwargs)
.output(str(out_filename), acodec='pcm_s16le', ac=1, ar='44100')
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
print(e.stderr, file=sys.stderr)
sys.exit(1)
return out_filename
def insert_subs(in_filename, subs_filename):
out_filename = str(in_filename.with_suffix('')) + "-subbed"
try:
stream = (ffmpeg
.input(subs_filename, i=in_filename)
.output(out_filename + ".mkv", codec='copy')
#.output(out_filename + ".mp4", **{'c:s':'mov_text'}, sub_charenc='UTF-8', codec='copy')
.overwrite_output())
print(" ".join(stream.get_args()))
out, err = stream.run(capture_stdout=True, capture_stderr=True)
except ffmpeg.Error as e:
print(e.stderr, file=sys.stderr)
sys.exit(1)
return out
def convtime(seconds):
return datetime.utcfromtimestamp(seconds).strftime('%H:%M:%S,%f')[:-3]
def get_transcripts(video_filename, audio_filename):
print("transcribing")
debug = False
sub_filename = str(video_filename.with_suffix('.ass'))
json_filename = str(video_filename.with_suffix('.json'))
test = codecs.open(sub_filename, "w", "utf-8-sig")
wf = wave.open(str(audio_filename), "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print("Audio file must be WAV format mono PCM.")
exit(1)
frames = wf.getnframes()
rate = wf.getframerate()
duration = frames / float(rate)
model = Model("model")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
i = 0
words = []
text = ""
while True:
data = wf.readframes(100)
if len(data) == 0:
print("too few data")
break
if rec.AcceptWaveform(data):
data = json.loads(rec.Result())
if "text" in data:
print(data["text"])
if "result" in data:
words = words + data["result"]
print('{0:.3f}% in audio time'.format((float(data["result"][-1]["end"]) / duration) * 100, 3), end='\r')
test.write(str(i) + "\n")
test.write(convtime(data["result"][0]["start"]) + " --> " + convtime(data["result"][-1]["end"]) + "\n")
test.write(data["text"] + "\n\n")
i += 1
elif debug == True:
print(rec.PartialResult())
with open(json_filename, 'w') as f:
json.dump(words, f)
test.close()
insert_subs(video_filename, sub_filename)
def transcribe(video_filename):
if video_filename.with_suffix('.ass').is_file():
print("Reusing existing subtitles.")
insert_subs(video_filename, str(video_filename.with_suffix('.ass')))
else:
audio_filename = decode_audio(video_filename, video_filename.with_suffix('.wav'))
transcripts = get_transcripts(video_filename, audio_filename)
if __name__ == '__main__':
args = parser.parse_args()
transcribe(Path(abspath(args.in_filename)))