ThioJoe · JvSdv · Jan 2, 2023 · Jan 3, 2023 · Jan 3, 2023 · Jan 3, 2023
diff --git a/README.md b/README.md
@@ -48,7 +48,12 @@ If you already have a human-made SRT subtitles file for a video, this will:
    - Open the script file with a text editor and change the values in the "User Settings" section at the top.
    - This will label the tracks so the video file is ready to be uploaded to YouTube. HOWEVER, the multiple audio tracks feature is only available to a limited number of channels. You will most likely need to contact YouTube creator support to ask for access, but there is no guarantee they will grant it.
 - **Optional:** You can use the separate `TitleTranslator.py` script if uploading to YouTube, which lets you enter a video's Title and Description, and the text will be translated into all the languages enabled in `batch.ini`. They wil be placed together in a single text file in the "output" folder.
-
+- **Optional:** You can use the separate `whisperx.py` script to transcribe a video to (file.srt).
+   - Open the script file with a text editor and change the values in the "User Settings" section at the top.
+   - To use this script, you need to install WhisperX using **`pip install git+https://github.com/m-bain/whisperx.git`**
+   - To improve the processing speed, you can install PyTorch with a NVIDIA video card and use it to run the script. This will significantly increase the processing speed, but you need to install the CUDA and cuDNN libraries as well. See the PyTorch website for instructions on how to install PyTorch locally with a NVIDIA GPU: https://pytorch.org/get-started/locally/.
+   - [**Whisperx**](https://github.com/m-bain/whisperX)
+
 ----
 
 ## Additional Notes:

diff --git a/requirements.txt b/requirements.txt
@@ -8,4 +8,4 @@ azure-cognitiveservices-speech
 langcodes
 language_data
 numpy
-deepl
+deepl
diff --git a/whisperx.py b/whisperx.py
@@ -0,0 +1,57 @@
+
+#========================================= USER SETTINGS ===============================================
+
+# REMEMBER: Unlike the .ini config files, the variable values here must be surrounded by "quotation" marks
+
+    # The video can be anywhere as long as you use the full absolute filepath. Or you can use a relative path.
+    # This script assumes the video is an mp4 file. I'm not sure if it will work with other formats/containers.
+originalVideoFile = r"folder\your_video.mp4"
+
+    # Output base folder of the video transcription
+    # The script will create a folder with the original video name inside the output folder
+outputFolder = r"output"    
+
+    # The model to use for the transcription. The default is "small.en".
+    # Check the list of available models at https://github.com/openai/whisper
+whisperModel = r"small.en"
+
+    # The alignment model is used to improve the accuracy of the timestamps. The default is "WAV2VEC2_ASR_LARGE_LV60K_960H" for English only.
+    # To use other languages, see https://github.com/m-bain/whisperX
+whisperXAlignModel = r"WAV2VEC2_ASR_LARGE_LV60K_960H"
+
+#========================================================================================================
+
+import re
+import subprocess
+import os
+
+# Create the output folder based on the original video file name
+VideoFileName = os.path.basename(originalVideoFile).split(".")[0]
+VideoFileName = re.sub(r"[^\w\s-]", "", VideoFileName) #Remove special characters
+outputFolder = f"{outputFolder}/{VideoFileName}"
+
+#whisperx (Whisper-Based Automatic Speech Recognition (ASR) with improved timestamp accuracy using forced alignment)
+def transcribe(videoFile, outputFolder, Model, AlignModel):
+    #Create the output folder
+    if not os.path.exists(outputFolder):
+        os.makedirs(outputFolder)
+
+    #If already exists, delete the original.wav file
+    if os.path.exists(f"{outputFolder}/original.wav"):
+        os.remove(f"{outputFolder}/original.wav")
+
+    #Extract the audio from the original video to wav and save it in the output/{original_video_name}
+    command = f"ffmpeg -i {videoFile} -vn -acodec pcm_s16le -ac 1 -ar 48000 -f wav {outputFolder}/original.wav"
+    subprocess.call(command, shell=True)
+
+    #If you want to install whisperx in another environment, use conda envs
+    #os.system(f"conda activate {our_env} && whisperx...")
+    #Run whisperx
+    os.system(f"whisperx {outputFolder}/original.wav --model {Model} --align_model {AlignModel} --output_dir {outputFolder}")
+    #verify if the transcription is done
+    if os.path.exists(f"{outputFolder}/original.wav.srt"):
+        print(f"Transcription completed. The output file is in {outputFolder}/original.wav.srt")
+    else:
+        print(f"Transcription failed. Check the README.md file for more information about the whisperx installation.")
+
+transcribe(originalVideoFile, outputFolder, whisperModel, whisperXAlignModel)
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,4 +8,4 @@ azure-cognitiveservices-speech @@
     langcodes
     language_data
     numpy
-    deepl
+    deepl