livekit · darshankparmar · Jan 7, 2026 · Jan 7, 2026 · Jan 24, 2026 · Jan 24, 2026
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -93,7 +93,8 @@ uv pip install pip && uv run mypy --install-types --non-interactive \
     -p livekit.plugins.sarvam \
     -p livekit.plugins.inworld \
     -p livekit.plugins.simli \
-    -p livekit.plugins.anam
+    -p livekit.plugins.anam \
+    -p livekit.plugins.vosk
 ```
 
 ### Linting

diff --git a/livekit-agents/pyproject.toml b/livekit-agents/pyproject.toml
@@ -108,7 +108,7 @@ ultravox = ["livekit-plugins-ultravox>=1.3.12"]
 upliftai = ["livekit-plugins-upliftai>=1.3.12"]
 gradium = ["livekit-plugins-gradium>=1.3.12"]
 xai = ["livekit-plugins-xai>=1.3.12"]
-
+vosk = ["livekit-plugins-vosk>=1.3.12"]
 
 [project.urls]
 Documentation = "https://docs.livekit.io"

diff --git a/livekit-plugins/livekit-plugins-vosk/README.md b/livekit-plugins/livekit-plugins-vosk/README.md
@@ -0,0 +1,202 @@
+# Vosk Plugin for LiveKit Agents
+
+Offline speech-to-text plugin using [Vosk](https://alphacephei.com/vosk/) for the LiveKit Agents framework.
+
+## Features
+
+- **Offline Processing**: Runs entirely locally without internet connection
+- **No API Keys Required**: Completely free, no cloud service costs
+- **Multi-language Support**: 20+ languages including English, Spanish, French, German, Chinese, Russian, and more
+- **Streaming Recognition**: Real-time transcription with interim results
+- **Word-level Timestamps**: Precise timing information for each word
+- **Speaker Diarization**: Optional speaker identification (requires speaker model)
+- **Privacy-focused**: All processing happens on your device
+
+## Installation
+
+```bash
+pip install livekit-plugins-vosk
+```
+
+## Download Models
+
+Vosk requires pre-downloaded models. Download from: https://alphacephei.com/vosk/models
-Vosk requires pre-downloaded models. Download from: https://alphacephei.com/vosk/models
+Vosk requires pre-downloaded models. Download from: [Vosk models](https://alphacephei.com/vosk/models)
-Vosk requires pre-downloaded models. Download from: https://alphacephei.com/vosk/models
+Vosk requires pre-downloaded models. Download from: [Vosk models](https://alphacephei.com/vosk/models)
+
+### Quick Start - Small English Model (~40MB)
+
+```bash
+mkdir -p ~/.cache/vosk/models
+cd ~/.cache/vosk/models
+wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
+unzip vosk-model-small-en-us-0.15.zip
+```
+
+### Available Models
+
+- **English**: `vosk-model-en-us-0.22` (large), `vosk-model-small-en-us-0.15` (small)
+- **Spanish**: `vosk-model-es-0.42`
+- **French**: `vosk-model-fr-0.22`
+- **German**: `vosk-model-de-0.21`
+- **Chinese**: `vosk-model-cn-0.22`
+- **Russian**: `vosk-model-ru-0.42`
+- **And many more...**
+
+See the [full model list](https://alphacephei.com/vosk/models).
+
+## Usage
+
+### Basic Example
+
+```python
+from livekit.agents import JobContext, cli, WorkerOptions
+from livekit.plugins import vosk
+import os
+
+async def entrypoint(ctx: JobContext):
+    await ctx.connect()
+
+    # Path to your downloaded Vosk model
+    model_path = os.path.expanduser("~/.cache/vosk/models/vosk-model-small-en-us-0.15")
+
+    # Create STT instance
+    stt_instance = vosk.STT(
+        model_path=model_path,
+        language="en",
+        sample_rate=16000,
+        enable_words=True,
+    )
+
+    # Use in streaming mode
+    stream = stt_instance.stream()
+
+    # Process audio frames...
+    # stream.push_frame(audio_frame)
+
+    # Get transcription events
+    async for event in stream:
+        if event.type == "final_transcript":
+            print(f"Final: {event.alternatives[0].text}")
+        elif event.type == "interim_transcript":
+            print(f"Interim: {event.alternatives[0].text}")
+
+if __name__ == "__main__":
+    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
+```
+
+### With AgentSession
+
+```python
+from livekit.agents import Agent, AgentSession, JobContext, cli, WorkerOptions
+from livekit.plugins import vosk, silero
+import os
+
+async def entrypoint(ctx: JobContext):
+    await ctx.connect()
+
+    model_path = os.path.expanduser("~/.cache/vosk/models/vosk-model-en-us-0.22")
+
+    agent = Agent(
+        instructions="You are a helpful voice assistant.",
+    )
+
+    session = AgentSession(
+        vad=silero.VAD.load(),
+        stt=vosk.STT(
+            model_path=model_path,
+            language="en",
+            enable_words=True,
+        ),
+        llm="openai/gpt-4o",
+        tts="cartesia/sonic-2",
+    )
+
+    await session.start(agent=agent, room=ctx.room)
+    await session.generate_reply(instructions="greet the user")
+
+if __name__ == "__main__":
+    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
+```
+
+### With Speaker Diarization
+
+```python
+stt_instance = vosk.STT(
+    model_path="/path/to/vosk-model-en-us-0.22",
+    speaker_model_path="/path/to/vosk-model-spk-0.4",
+    language="en",
+    enable_words=True,
+)
+```
+
+## Configuration Options
+
+### STT Constructor
+
+- **`model_path`** (required): Path to the Vosk model directory
+- **`language`** (default: `"en"`): Language code for metadata
+- **`sample_rate`** (default: `16000`): Audio sample rate in Hz
+- **`enable_words`** (default: `True`): Include word-level timestamps
+- **`max_alternatives`** (default: `0`): Number of alternative transcriptions (0 = disabled)
+- **`speaker_model_path`** (optional): Path to speaker identification model
+
+## Supported Languages
+
+Vosk supports 20+ languages:
+
+- English (US, Indian)
+- Spanish
+- French
+- German
+- Italian
+- Portuguese
+- Chinese
+- Russian
+- Japanese
+- Turkish
+- Vietnamese
+- Dutch
+- Catalan
+- Arabic
+- Greek
+- Farsi
+- Filipino
+- Ukrainian
+- Kazakh
+- Swedish
+- And more...
+
+See https://alphacephei.com/vosk/models for the complete list.
-See https://alphacephei.com/vosk/models for the complete list.
+See the [Vosk models list](https://alphacephei.com/vosk/models) for the complete list.
-See https://alphacephei.com/vosk/models for the complete list.
+See the [Vosk models list](https://alphacephei.com/vosk/models) for the complete list.
+
+## Performance Tips
+
+1. **Model Size**: Smaller models (~50MB) are faster but less accurate. Larger models (~1GB) provide better accuracy.
+2. **Sample Rate**: Vosk works best with 16kHz audio. The plugin automatically resamples if needed.
+3. **CPU Usage**: Vosk runs on CPU. For production, use a server with adequate CPU resources.
+4. **Memory**: Load models once and reuse them across multiple streams to save memory. The plugin automatically caches loaded models globally.
+5. **Prewarming**: Call `stt_instance.prewarm()` at startup to load models into memory before the first request, reducing initial latency.
+
+## Advantages
+
+- ✅ **Privacy**: All processing is local, no data sent to cloud
+- ✅ **Cost**: Completely free, no API fees
+- ✅ **Latency**: Lower latency without network round-trip
+- ✅ **Reliability**: Works offline, no internet dependency
+- ✅ **Compliance**: Easier to meet data residency requirements
+
+## Limitations
+
+- Requires pre-downloaded models (50MB - 1GB)
+- Accuracy may be lower than latest cloud models for some languages
+- Requires local compute resources (CPU/memory)
+- Model updates require manual download
+
+## License
+
+Apache 2.0
+
+## Links
+
+- [Vosk Website](https://alphacephei.com/vosk/)
+- [Vosk GitHub](https://github.com/alphacep/vosk-api)
+- [Vosk Models](https://alphacephei.com/vosk/models)
+- [LiveKit Agents](https://docs.livekit.io/agents/)
diff --git a/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/__init__.py b/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/__init__.py
@@ -0,0 +1,45 @@
+# Copyright 2025 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Vosk plugin for LiveKit Agents
+
+Offline speech-to-text with [Vosk](https://alphacephei.com/vosk/).
+"""
+
+from .stt import STT, SpeechStream
+from .version import __version__
+
+__all__ = ["STT", "SpeechStream", "__version__"]
+
+
+from livekit.agents import Plugin
+
+from .log import logger
+
+
+class VoskPlugin(Plugin):
+    def __init__(self) -> None:
+        super().__init__(__name__, __version__, __package__, logger)
+
+
+Plugin.register_plugin(VoskPlugin())
+
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+
+__pdoc__ = {}
+
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False
diff --git a/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/log.py b/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/log.py
@@ -0,0 +1,17 @@
+# Copyright 2025 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+logger = logging.getLogger(__name__)
diff --git a/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/models.py b/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/models.py
@@ -0,0 +1,87 @@
+# Copyright 2025 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Vosk model management utilities."""
+
+from pathlib import Path
+
+
+class VoskModels:
+    """
+    Pre-defined Vosk model identifiers.
+
+    Download models from: https://alphacephei.com/vosk/models
+    """
+
+    # English models
+    EN_US_SMALL = "vosk-model-small-en-us-0.15"
+    EN_US = "vosk-model-en-us-0.22"
+    EN_US_LARGE = "vosk-model-en-us-0.22-lgraph"
+
+    # Other languages
+    CN = "vosk-model-cn-0.22"
+    DE = "vosk-model-de-0.21"
+    ES = "vosk-model-es-0.42"
+    FR = "vosk-model-fr-0.22"
+    IT = "vosk-model-it-0.22"
+    JA = "vosk-model-ja-0.22"
+    PT = "vosk-model-pt-0.3"
+    RU = "vosk-model-ru-0.42"
+    TR = "vosk-model-tr-0.3"
+    VI = "vosk-model-vi-0.4"
+
+    # Speaker identification model
+    SPEAKER_MODEL = "vosk-model-spk-0.4"
+
+
+DEFAULT_MODEL_DIR = Path.home() / ".cache" / "vosk" / "models"
+
+
+def validate_model_path(model_path: str | Path) -> Path:
+    """
-def validate_model_path(model_path: str | Path) -> Path:
-    """
+def validate_model_path(model_path: Union[str, Path]) -> Path:
+    """
-def validate_model_path(model_path: str | Path) -> Path:
-    """
+def validate_model_path(model_path: Union[str, Path]) -> Path:
+    """
+    Validate that a model path exists and contains required files.
+
+    Args:
+        model_path: Path to Vosk model directory
+
+    Returns:
+        Validated Path object
+
+    Raises:
+        FileNotFoundError: If model path doesn't exist or is invalid
+    """
+    path = Path(model_path)
+
+    if not path.exists():
+        raise FileNotFoundError(
+            f"Model path does not exist: {path}\n"
+            f"Download models from: https://alphacephei.com/vosk/models"
+        )
+
+    if not path.is_dir():
+        raise FileNotFoundError(f"Model path must be a directory: {path}")
+
+    # Check for required model files
+    # Note: Structure varies between small and large models
+    # Large models have graph/HCLG.fst, small models might have different graph files
+    if not (path / "am/final.mdl").exists():
+        raise FileNotFoundError(
+            f"Model directory is missing 'am/final.mdl': {path}\n"
+            f"This is required for all Vosk models."
+        )
+
+    if not (path / "conf/model.conf").exists():
+        raise FileNotFoundError(f"Model directory is missing 'conf/model.conf': {path}")
+
+    return path
diff --git a/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/py.typed b/livekit-plugins/livekit-plugins-vosk/livekit/plugins/vosk/py.typed