From 9d7795015c3f4ac76103759f562a3219ca998fb7 Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sat, 17 May 2025 12:36:45 +0900
Subject: [PATCH 1/5] feat: Download with vosk model

---
 setup_vosk.py | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 setup_vosk.py

diff --git a/setup_vosk.py b/setup_vosk.py
new file mode 100644
index 00000000..c72de546
--- /dev/null
+++ b/setup_vosk.py
@@ -0,0 +1,50 @@
+# /// script
+# requires-python = ">=3.9"
+# dependencies = [
+#     "requests",
+#     "tqdm",
+# ]
+# ///
+import os
+import shutil
+import tempfile
+import zipfile
+
+import requests
+from tqdm import tqdm
+
+
+def setup_vosk_model(model_url: str, model_dir: str) -> None:
+    model_filename = os.path.basename(model_url)
+    model_name = os.path.splitext(model_filename)[0]
+
+    print(f"Downloading model {model_filename} ...")
+    response = requests.get(model_url, stream=True)
+    total_size = int(response.headers.get("content-length", 0))
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        download_path = os.path.join(temp_dir, model_filename)
+        with open(download_path, "wb") as f:
+            with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        pbar.update(len(chunk))
+
+        print("Unzip model...")
+        with zipfile.ZipFile(download_path, "r") as zip_ref:
+            zip_ref.extractall(temp_dir)
+
+        extracted_dir = os.path.join(temp_dir, model_name)
+        if os.path.exists(model_dir):
+            shutil.rmtree(model_dir)
+        shutil.copytree(extracted_dir, model_dir)
+
+    print(f"Setup complete! Model is placed in the directory: {model_dir}")
+
+
+if __name__ == "__main__":
+    model_url = (
+        "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"
+    )
+    setup_vosk_model(model_url, "model")

From 1d37d8b6d5978e357525fd758c3771a2be9e6cd6 Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sat, 17 May 2025 12:39:16 +0900
Subject: [PATCH 2/5] chore: vosk extra

---
 setup.cfg | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 23dc597b..a6dac587 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,3 +23,5 @@ groq =
     httpx < 0.28
 assemblyai =
     requests
+vosk =
+    vosk

From c70f9e7d62a06790bc79e529ee5eaf6f6543927d Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sat, 17 May 2025 12:52:56 +0900
Subject: [PATCH 3/5] test: Developers can test recognize_vosk() in cloned
 repository

---
 tests/recognizers/test_vosk.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 tests/recognizers/test_vosk.py

diff --git a/tests/recognizers/test_vosk.py b/tests/recognizers/test_vosk.py
new file mode 100644
index 00000000..9a8e79d8
--- /dev/null
+++ b/tests/recognizers/test_vosk.py
@@ -0,0 +1,18 @@
+from pathlib import Path
+
+from speech_recognition import AudioData, Recognizer
+
+
+def test_recognize_vosk():
+    audio_file = str(Path(__file__).parent.parent / "english.wav")
+    audio_data = AudioData.from_file(audio_file)
+    sut = Recognizer()
+
+    actual = sut.recognize_vosk(audio_data)
+
+    expected = """\
+{
+  "text" : "one two three"
+}\
+"""
+    assert actual == expected

From 93086ff5876ceb6c797d0537e43ab25f43a4aa3d Mon Sep 17 00:00:00 2001
From: ftnext <takuyafjp+develop@gmail.com>
Date: Sat, 17 May 2025 12:53:18 +0900
Subject: [PATCH 4/5] chore: Add vosk test in CI

---
 .github/workflows/unittests.yml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index ed5fe9cb..4456c978 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -46,16 +46,18 @@ jobs:
       - name: Install Python dependencies (Ubuntu, <=3.12)
         if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
         run: |
-          python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq]
+          python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk]
       - name: Install Python dependencies (Ubuntu, 3.13)
         if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
         run: |
           python -m pip install standard-aifc setuptools
-          python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,google-cloud,openai,groq]
+          python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk]
       - name: Install Python dependencies (Windows)
         if: matrix.os == 'windows-latest'
         run: |
-          python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq]
+          python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk]
+      - name: Set up vosk model
+        run: pipx run setup_vosk.py
       - name: Test with unittest
         run: |
           pytest --doctest-modules -v speech_recognition/recognizers/ tests/

From 898aef17c4d608a634d9939a53fb9d9f35e98873 Mon Sep 17 00:00:00 2001
From: nikkie <takuyafjp+develop@gmail.com>
Date: Sat, 17 May 2025 13:03:03 +0900
Subject: [PATCH 5/5] feat: Handle potential download errors (thanks Copliot
 review)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 setup_vosk.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup_vosk.py b/setup_vosk.py
index c72de546..b8b5877d 100644
--- a/setup_vosk.py
+++ b/setup_vosk.py
@@ -20,6 +20,7 @@ def setup_vosk_model(model_url: str, model_dir: str) -> None:
 
     print(f"Downloading model {model_filename} ...")
     response = requests.get(model_url, stream=True)
+    response.raise_for_status()
     total_size = int(response.headers.get("content-length", 0))
 
     with tempfile.TemporaryDirectory() as temp_dir: