ThioJoe · JvSdv · Jan 2, 2023 · Jan 3, 2023 · Jan 3, 2023 · Jan 3, 2023
diff --git a/README.md b/README.md
@@ -47,6 +47,7 @@ If you already have a human-made SRT subtitles file for a video, this will:
    - Open the script file with a text editor and change the values in the "User Settings" section at the top.
    - This will label the tracks so the video file is ready to be uploaded to YouTube. HOWEVER, the multiple audio tracks feature is only available to a limited number of channels. You will most likely need to contact YouTube creator support to ask for access, but there is no guarantee they will grant it.
 - **Optional:** You can use the separate `TitleTranslator.py` script if uploading to YouTube, which lets you enter a video's Title and Description, and the text will be translated into all the languages enabled in `batch.ini`. They wil be placed together in a single text file in the "output" folder.
+- **Optional:** You can use the separate `whisperx.py` script to create a transcription (.srt) of your english video, as informed in the `batch.ini` configuration file. For other languages see [**Whisperx**](https://github.com/m-bain/whisperX). To improve the processing use pytorch (https://pytorch.org/get-started/locally/) with nvidia video card.
 
 ----
 

diff --git a/audio_builder.py b/audio_builder.py
@@ -1,3 +1,4 @@
+import re
 import soundfile
 import pyrubberband
 import configparser
@@ -26,6 +27,12 @@
 cloudConfig = configparser.ConfigParser()
 cloudConfig.read('cloud_service_settings.ini')
 
+# Get the video file name and create the output folder based on the original video file name
+originalVideoFile = os.path.abspath(batchConfig['SETTINGS']['original_video_file_path'].strip("\""))
+fileName = os.path.basename(originalVideoFile).split(".")[0]
+fileName = re.sub(r"[^\w\s-]", "", fileName)
+outputFolder = outputFolder + "/" + fileName
+
 # Get variables from configs
 nativeSampleRate = int(config['SETTINGS']['synth_sample_rate'])
 originalVideoFile = os.path.abspath(batchConfig['SETTINGS']['original_video_file_path'].strip("\""))

diff --git a/main.py b/main.py
@@ -72,6 +72,11 @@
 originalVideoFile = os.path.abspath(batchConfig['SETTINGS']['original_video_file_path'].strip("\""))
 srtFile = os.path.abspath(batchConfig['SETTINGS']['srt_file_path'].strip("\""))
 
+# Create the output folder based on the original video file name
+fileName = os.path.basename(originalVideoFile).split(".")[0]
+fileName = re.sub(r"[^\w\s-]", "", fileName)
+outputFolder = outputFolder + "/" + fileName
+
 # Validate the number of sections
 for num in languageNums:
     # Check if section exists

diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,4 @@ azure-cognitiveservices-speech
 langcodes
 language_data
 numpy
+git+https://github.com/m-bain/whisperx.git
diff --git a/whisperx.py b/whisperx.py
@@ -0,0 +1,36 @@
+import re
+import subprocess
+import os
+import configparser
+
+#---------------------------------------- Batch File Processing ----------------------------------------
+batchConfig = configparser.ConfigParser()
+batchConfig.read('batch.ini')
+
+# MOVE THIS INTO A VARIABLE AT SOME POINT
+outputFolder = "output"
+
+# Get the video file name Create the output folder based on the original video file name
+originalVideoFile = os.path.abspath(batchConfig['SETTINGS']['original_video_file_path'].strip("\""))
+
+#whisperx (Whisper-Based Automatic Speech Recognition (ASR) with improved timestamp accuracy using forced alignment)
+def transcribe(videoFile, output):
+    #Catch the video file name and create a folder with the same name
+    fileName = os.path.basename(videoFile).split(".")[0]
+    fileName = re.sub(r"[^\w\s-]", "", fileName) #Remove special characters
+    outputFolder = output + "/" + fileName
+
+    #Create the output folder
+    if not os.path.exists(outputFolder):
+        os.makedirs(outputFolder)
+
+    #Extract the audio from the original video to wav and save it in the output/{original_video_name}
+    command = f"ffmpeg -i {videoFile} -vn -acodec pcm_s16le -ac 1 -ar 48000 -f wav {outputFolder}/original.wav"
+    subprocess.call(command, shell=True)
+
+    #If you want to install whisperx in another environment, use conda envs
+    #os.system(f"conda activate whisperx && whisperx {outputFolder}/original.wav --model small.en --align_model WAV2VEC2_ASR_LARGE_LV60K_960H --output_dir {outputFolder}")
+    #Run whisperx
+    os.system(f"whisperx {outputFolder}/original.wav --model small.en --align_model WAV2VEC2_ASR_LARGE_LV60K_960H --output_dir {outputFolder}")
+
+transcribe(originalVideoFile, outputFolder)