chidiwilliams · raivisdejus · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
diff --git a/buzz/model_loader.py b/buzz/model_loader.py
@@ -43,10 +43,17 @@
     SMALL = "small"
     MEDIUM = "medium"
     LARGE = "large"
+    LARGEV2 = "large-v2"
+    LARGEV3 = "large-v3"
 
     def to_faster_whisper_model_size(self) -> str:
         if self == WhisperModelSize.LARGE:
-            return "large-v2"
+            return "large-v1"
+        return self.value
+
+    def to_whisper_cpp_model_size(self) -> str:
+        if self == WhisperModelSize.LARGE:
+            return "large-v1"
         return self.value
 
     def __str__(self):
@@ -201,7 +208,9 @@
     "base": "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe",
     "small": "1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b",
     "medium": "6c14d5adee5f86394037b4e4e8b59f1673b6cee10e3cf0b11bbdbee79c156208",
-    "large": "64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2",
+    "large-v1": "7d99f41a10525d0206bddadd86760181fa920438b6b33237e3118ff6c83bb53d",
+    "large-v2": "9a423fe4d40c82774b6af34115b8b935f34152246eb19e80e376071d3f999487",
+    "large-v3": "64d182b440b98d5203c4f9bd541544d84c605196c4f7b845dfa11fb23594d1e2",
 }
 
 
@@ -318,7 +327,12 @@
             % (size, ", ".join(faster_whisper.utils._MODELS))
         )
 
-    repo_id = "guillaumekln/faster-whisper-%s" % size
+    logging.debug("Downloading Faster Whisper model: %s", size)
+
+    if size == WhisperModelSize.LARGEV3:
+        repo_id = "Systran/faster-whisper-large-v3"
+    else:
+        repo_id = "guillaumekln/faster-whisper-%s" % size
 
     allow_patterns = [
         "model.bin",  # largest by size first
@@ -357,7 +371,7 @@
 
     def run(self) -> None:
         if self.model.model_type == ModelType.WHISPER_CPP:
-            model_name = self.model.whisper_model_size.value
+            model_name = self.model.whisper_model_size.to_whisper_cpp_model_size()
             url = huggingface_hub.hf_hub_url(
                 repo_id="ggerganov/whisper.cpp",
                 filename=f"ggml-{model_name}.bin",

diff --git a/docs/docs/faq.md b/docs/docs/faq.md
@@ -19,4 +19,10 @@ sidebar_position: 5
    Relevant tools:
    - Mac OS - [BlackHole](https://github.com/ExistentialAudio/BlackHole).
    - Windows - [VB CABLE](https://vb-audio.com/Cable/)
-   - Linux - [PulseAudio Volume Control](https://wiki.ubuntu.com/record_system_sound)
+   - Linux - [PulseAudio Volume Control](https://wiki.ubuntu.com/record_system_sound)
+
+4. **What model should I use?**
+
+   Model size to use will depend on your hardware and use case. Smaller models will work faster but will have more inaccuracies. Larger models will be more accurate but will require more powerful hardware or longer time to transcribe. 
+
+   When choosing among large models consider the following. "Large" is the first released older model, "Large-V2" is later updated model with better accuracy, for some languages considered the most robust and stable. "Large-V3" is the latest model with the best accuracy in many cases, but some times can hallucinate or invent words that were never in the audio. The only sure way to know what model best suits your needs is to test them all in your language.