Add benchmarks (#417)

chidiwilliams · Apr 25, 2023 · 84ab53d · 84ab53d
1 parent 158aa0b
commit 84ab53d
Show file tree

Hide file tree

Showing 8 changed files with 169 additions and 3 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -178,6 +178,79 @@ jobs:
             dist/Buzz*-mac.dmg
             dist/Buzz*-unix.tar.gz
 
+  benchmark:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: macos-latest
+          - os: windows-latest
+          - os: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.7'
+
+      - name: Install Poetry Action
+        uses: snok/[email protected]
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v3
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-2
+
+      - name: Load cached Whisper models
+        id: cached-whisper-models
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/Library/Caches/Buzz
+            ~/.cache/whisper
+            ~/.cache/huggingface
+            ~/AppData/Local/Buzz/Buzz/Cache
+          key: whisper-models-${{ runner.os }}
+
+      - uses: FedericoCarboni/setup-ffmpeg@v1
+        id: setup-ffmpeg
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install dependencies
+        run: poetry config experimental.new-installer false && poetry install
+
+      - name: Test
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            sudo apt install libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-xfixes0 x11-utils
+            /sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1920x1200x24 -ac +extension GLX
+            sudo apt update
+            sudo apt install -y libpulse-mainloop-glib0 libegl1-mesa-dev libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev libportaudio2
+          fi
+          
+          poetry run make benchmarks
+        shell: bash
+
+      - name: Store benchmark results
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: ${{ runner.os }}
+          tool: 'pytest'
+          output-file-path: benchmarks.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          comment-on-alert: true
+          summary-always: true
+          auto-push: true
+
   release:
     runs-on: ${{ matrix.os }}
     strategy:

diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,5 @@ whisper_cpp.py
 
 # Internationalization - compiled binaries
 locale/**/*.mo
+
+benchmarks.json
diff --git a/Makefile b/Makefile
@@ -38,7 +38,10 @@ clean:
 	rm -rf dist/* || true
 
 test: buzz/whisper_cpp.py translation_mo
-	pytest -vv --cov=buzz --cov-report=xml --cov-report=html
+	pytest -vv --cov=buzz --cov-report=xml --cov-report=html --benchmark-skip
+
+benchmarks: buzz/whisper_cpp.py translation_mo
+	pytest -vv --benchmark-only --benchmark-json benchmarks.json
 
 dist/Buzz dist/Buzz.app: buzz/whisper_cpp.py translation_mo
 	pyinstaller --noconfirm Buzz.spec

diff --git a/buzz/gui.py b/buzz/gui.py
@@ -1371,7 +1371,7 @@ def __init__(self, shortcuts: Dict[str, str], openai_api_key: str, parent: QWidg
         self.import_action.triggered.connect(
             self.on_import_action_triggered)
 
-        about_action = QAction(f'{_("About...")} {APP_NAME}', self)
+        about_action = QAction(f'{_("About")} {APP_NAME}', self)
         about_action.triggered.connect(self.on_about_action_triggered)
 
         self.preferences_action = QAction(_("Preferences..."), self)

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,7 @@ pytest-xvfb = "^2.0.0"
 pylint = "^2.15.5"
 pre-commit = "^2.20.0"
 ctypesgen = "^1.1.1"
+pytest-benchmark = "^4.0.0"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/testdata/audio-long.mp3 b/testdata/audio-long.mp3
diff --git a/tests/transcriber_benchmarks_test.py b/tests/transcriber_benchmarks_test.py
@@ -0,0 +1,54 @@
+import platform
+from unittest.mock import Mock
+
+import pytest
+
+from buzz.model_loader import WhisperModelSize, ModelType, TranscriptionModel
+from buzz.transcriber import (FileTranscriptionOptions, FileTranscriptionTask, Task, WhisperCppFileTranscriber,
+                              TranscriptionOptions, WhisperFileTranscriber, FileTranscriber)
+from tests.model_loader import get_model_path
+
+
+def get_task(model: TranscriptionModel):
+    file_transcription_options = FileTranscriptionOptions(
+        file_paths=['testdata/whisper-french.mp3'])
+    transcription_options = TranscriptionOptions(language='fr', task=Task.TRANSCRIBE,
+                                                 word_level_timings=False,
+                                                 model=model)
+    model_path = get_model_path(transcription_options.model)
+    return FileTranscriptionTask(file_path='testdata/audio-long.mp3', transcription_options=transcription_options,
+                                 file_transcription_options=file_transcription_options, model_path=model_path)
+
+
+def transcribe(qtbot, transcriber: FileTranscriber):
+    mock_completed = Mock()
+    transcriber.completed.connect(mock_completed)
+    with qtbot.waitSignal(transcriber.completed, timeout=10 * 60 * 1000):
+        transcriber.run()
+
+    segments = mock_completed.call_args[0][0]
+    return segments
+
+
+@pytest.mark.parametrize(
+    'transcriber',
+    [
+        pytest.param(
+            WhisperCppFileTranscriber(task=(get_task(
+                TranscriptionModel(model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY)))),
+            id="Whisper.cpp - Tiny"),
+        pytest.param(
+            WhisperFileTranscriber(task=(get_task(
+                TranscriptionModel(model_type=ModelType.WHISPER, whisper_model_size=WhisperModelSize.TINY)))),
+            id="Whisper - Tiny"),
+        pytest.param(
+            WhisperFileTranscriber(task=(get_task(
+                TranscriptionModel(model_type=ModelType.FASTER_WHISPER, whisper_model_size=WhisperModelSize.TINY)))),
+            id="Faster Whisper - Tiny",
+            marks=pytest.mark.skipif(platform.system() == 'Darwin',
+                                     reason='Error with libiomp5 already initialized on GH action runner: https://github.com/chidiwilliams/buzz/actions/runs/4657331262/jobs/8241832087')
+        ),
+    ])
+def test_should_transcribe_and_benchmark(qtbot, benchmark, transcriber):
+    segments = benchmark(transcribe, qtbot, transcriber)
+    assert len(segments) > 0
Original file line number	Diff line number	Diff line change
Expand Up		@@ -18,3 +18,5 @@ whisper_cpp.py

		# Internationalization - compiled binaries
		locale/*/.mo

		benchmarks.json