-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice_detection.py
41 lines (31 loc) · 1.4 KB
/
voice_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# import whisper
# model = whisper.load_model("base")
# result = model.transcribe("peppa pig.mp4")
# print(result["text"])
import whisper
def estimate_timestamps(text, total_duration):
sentences = text.split('. ')
duration_per_sentence = total_duration / len(sentences)
timestamps = [(i * duration_per_sentence, (i + 1) * duration_per_sentence) for i in range(len(sentences))]
return [
{"sentence": sentence.strip() + ".", "timestamp_start": start, "timestamp_end": end}
for sentence, (start, end) in zip(sentences, timestamps)
]
try:
# Load model and transcribe video
model = whisper.load_model("base")
result = model.transcribe("peppa pig.mp4")
# Total duration of the video in seconds
total_duration = 300 # example duration - you should retrieve the actual duration
# Get the estimated timestamps
transcript = estimate_timestamps(result['text'], total_duration)
# Save to a file
with open("transcription.txt", "w") as file:
for item in transcript:
sentence = item["sentence"]
start_time, end_time = item["timestamp_start"], item["timestamp_end"]
file.write(f"{start_time}s to {end_time}s: {sentence}\n")
except FileNotFoundError:
print("The specified file could not be found.")
except Exception as e:
print(f"An unexpected error occurred: {str(e)}")