Skip to content

Commit dd0453f

Browse files
authored
Merge pull request #54 from speechmatics/feature/add-tts-sdk
tts changes
2 parents 570c888 + 6ec56a7 commit dd0453f

File tree

14 files changed

+1172
-12
lines changed

14 files changed

+1172
-12
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,6 @@ cython_debug/
171171

172172
# PyPI configuration file
173173
.pypirc
174+
175+
# Examples
176+
**/output.wav

Makefile

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Makefile for Speechmatics Python SDKs
22

33
.PHONY: help
4-
.PHONY: test-all test-rt test-batch test-flow
5-
.PHONY: format-all format-rt format-batch format-flow
6-
.PHONY: lint-all lint-rt lint-batch lint-flow
7-
.PHONY: type-check-all type-check-rt type-check-batch type-check-flow
8-
.PHONY: build-all build-rt build-batch build-flow
9-
.PHONY: clean-all clean-rt clean-batch clean-flow clean-flow
4+
.PHONY: test-all test-rt test-batch test-flow test-tts
5+
.PHONY: format-all format-rt format-batch format-flow format-tts
6+
.PHONY: lint-all lint-rt lint-batch lint-flow lint-tts
7+
.PHONY: type-check-all type-check-rt type-check-batch type-check-flow type-check-tts
8+
.PHONY: build-all build-rt build-batch build-flow build-tts
9+
.PHONY: clean-all clean-rt clean-batch clean-flow clean-tts
1010

1111
help:
1212
@echo "Available commands:"
@@ -40,16 +40,18 @@ help:
4040
@echo " build-rt Build RT SDK"
4141
@echo " build-batch Build Batch SDK"
4242
@echo " build-flow Build Flow SDK"
43+
@echo " build-tts Build TTS SDK"
4344
@echo ""
4445
@echo "Cleaning:"
4546
@echo " clean-all Clean all SDKs"
4647
@echo " clean-rt Clean RT SDK build artifacts"
4748
@echo " clean-batch Clean Batch SDK build artifacts"
4849
@echo " clean-flow Clean Flow SDK build artifacts"
50+
@echo " clean-tts Clean TTS SDK build artifacts"
4951
@echo ""
5052

5153
# Testing targets
52-
test-all: test-rt test-batch test-flow
54+
test-all: test-rt test-batch test-flow test-tts
5355

5456
test-rt:
5557
pytest tests/rt/ -v
@@ -61,7 +63,7 @@ test-flow:
6163
pytest tests/flow/ -v
6264

6365
# Formatting targets
64-
format-all: format-rt format-batch format-flow
66+
format-all: format-rt format-batch format-flow format-tts
6567

6668
format-rt:
6769
cd sdk/rt/speechmatics && black .
@@ -75,8 +77,12 @@ format-flow:
7577
cd sdk/flow/speechmatics && black .
7678
cd sdk/flow/speechmatics && ruff check --fix .
7779

80+
format-tts:
81+
cd sdk/tts/speechmatics && black .
82+
cd sdk/tts/speechmatics && ruff check --fix .
83+
7884
# Linting targets
79-
lint-all: lint-rt lint-batch lint-flow
85+
lint-all: lint-rt lint-batch lint-flow lint-tts
8086

8187
lint-rt:
8288
cd sdk/rt/speechmatics && ruff check .
@@ -87,8 +93,11 @@ lint-batch:
8793
lint-flow:
8894
cd sdk/flow/speechmatics && ruff check .
8995

96+
lint-tts:
97+
cd sdk/tts/speechmatics && ruff check .
98+
9099
# Type checking targets
91-
type-check-all: type-check-rt type-check-batch type-check-flow
100+
type-check-all: type-check-rt type-check-batch type-check-flow type-check-tts
92101

93102
type-check-rt:
94103
cd sdk/rt/speechmatics && mypy .
@@ -99,18 +108,22 @@ type-check-batch:
99108
type-check-flow:
100109
cd sdk/flow/speechmatics && mypy .
101110

111+
type-check-tts:
112+
cd sdk/tts/speechmatics && mypy .
113+
102114
# Installation targets
103115
install-dev:
104116
python -m pip install --upgrade pip
105117
python -m pip install -e sdk/rt[dev]
106118
python -m pip install -e sdk/batch[dev]
107119
python -m pip install -e sdk/flow[dev]
120+
python -m pip install -e sdk/tts[dev]
108121

109122
install-build:
110123
python -m pip install --upgrade build
111124

112125
# Building targets
113-
build-all: build-rt build-batch build-flow
126+
build-all: build-rt build-batch build-flow build-tts
114127

115128
build-rt: install-build
116129
cd sdk/rt && python -m build
@@ -121,8 +134,11 @@ build-batch: install-build
121134
build-flow: install-build
122135
cd sdk/flow && python -m build
123136

137+
build-tts: install-build
138+
cd sdk/tts && python -m build
139+
124140
# Cleaning targets
125-
clean-all: clean-rt clean-batch clean-flow
141+
clean-all: clean-rt clean-batch clean-flow clean-tts
126142

127143
clean-rt:
128144
rm -rf sdk/rt/dist sdk/rt/build sdk/rt/*.egg-info
@@ -135,3 +151,7 @@ clean-batch:
135151
clean-flow:
136152
rm -rf sdk/flow/dist sdk/flow/build sdk/flow/*.egg-info
137153
find sdk/flow -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
154+
155+
clean-tts:
156+
rm -rf sdk/tts/dist sdk/tts/build sdk/tts/*.egg-info
157+
find sdk/tts -name __pycache__ -exec rm -rf {} + 2>/dev/null || true

examples/tts/tts_async_example.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
import asyncio
3+
4+
import wave
5+
from pathlib import Path
6+
7+
from speechmatics.tts import AsyncClient, Voice, OutputFormat
8+
9+
10+
# Set configuration
11+
TEXT = "Welcome to the future of audio generation from text!"
12+
VOICE = Voice.SARAH
13+
OUTPUT_FORMAT = OutputFormat.RAW_PCM_16000
14+
OUTPUT_FILE = "output.wav"
15+
16+
# Set Format Parameters for WAV output file
17+
SAMPLE_RATE = 16000 #Hz
18+
SAMPLE_WIDTH = 2 # 16-bit audio
19+
CHANNELS = 1 # Mono audio
20+
21+
# Save audio to WAV file
22+
async def save_audio_to_wav(audio_data: bytes,
23+
output_file_name: str) -> None:
24+
with wave.open(output_file_name, "wb") as wav_file:
25+
wav_file.setnchannels(CHANNELS)
26+
wav_file.setsampwidth(SAMPLE_WIDTH)
27+
wav_file.setframerate(SAMPLE_RATE)
28+
wav_file.writeframes(audio_data)
29+
30+
# Generate speech from text and save to WAV file
31+
async def main():
32+
print(f"Generating speech from text: {TEXT}")
33+
34+
try:
35+
async with AsyncClient() as client:
36+
async with await client.generate(
37+
text=TEXT,
38+
voice=VOICE,
39+
output_format=OUTPUT_FORMAT
40+
) as response:
41+
# Process the response in chunks and save to WAV
42+
audio_chunks = []
43+
async for chunk in response.content.iter_chunked(1024):
44+
audio_chunks.append(chunk)
45+
46+
# Combine chunks and save to WAV
47+
audio_data = b''.join(audio_chunks)
48+
await save_audio_to_wav(audio_data, OUTPUT_FILE)
49+
print(f"Speech saved to {Path(OUTPUT_FILE).resolve()}")
50+
except Exception as e:
51+
print(f"An error occurred: {e}")
52+
53+
# Run the async main function
54+
if __name__ == "__main__":
55+
asyncio.run(main())

sdk/tts/README.md

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Speechmatics TTS API Client
2+
3+
[![PyPI](https://img.shields.io/pypi/v/speechmatics-tts)](https://pypi.org/project/speechmatics-tts/)
4+
![PythonSupport](https://img.shields.io/badge/Python-3.9%2B-green)
5+
6+
Async Python client for Speechmatics TTS API.
7+
8+
## Features
9+
10+
- Async API client with comprehensive error handling
11+
- Type hints throughout for better IDE support
12+
- Environment variable support for credentials
13+
14+
## Installation
15+
16+
```bash
17+
pip install speechmatics-tts
18+
```
19+
20+
## Usage
21+
22+
### Quick Start
23+
24+
```python
25+
import asyncio
26+
27+
import wave
28+
from pathlib import Path
29+
30+
from speechmatics.tts import AsyncClient, Voice, OutputFormat
31+
32+
async def save_audio(audio_data: bytes, filename: str) -> None:
33+
with wave.open(filename, "wb") as wav:
34+
wav.setnchannels(1) # Mono
35+
wav.setsampwidth(2) # 16-bit
36+
wav.setframerate(16000) # 16kHz
37+
wav.writeframes(audio_data)
38+
39+
# Generate speech data from text and save to WAV file
40+
async def main():
41+
async with AsyncClient() as client:
42+
async with await client.generate(
43+
text="Welcome to the future of audio generation from text!",
44+
voice=Voice.SARAH,
45+
output_format=OutputFormat.RAW_PCM_16000
46+
) as response:
47+
audio = b''.join([chunk async for chunk in response.content.iter_chunked(1024)])
48+
await save_audio(audio, "output.wav")
49+
50+
51+
# Run the async main function
52+
if __name__ == "__main__":
53+
asyncio.run(main())
54+
55+
```
56+
57+
### Error Handling
58+
59+
```python
60+
import asyncio
61+
from speechmatics.tts import (
62+
AsyncClient,
63+
AuthenticationError,
64+
TimeoutError
65+
)
66+
67+
async def main():
68+
try:
69+
async with AsyncClient() as client:
70+
response = await client.generate(text="Hello, this is the Speechmatics TTS API. We are excited to have you here!")
71+
72+
except AuthenticationError:
73+
print("Invalid API key")
74+
except JobError as e:
75+
print(f"Job processing failed: {e}")
76+
except TimeoutError as e:
77+
print(f"Job timed out: {e}")
78+
except FileNotFoundError:
79+
print("Audio file not found")
80+
81+
asyncio.run(main())
82+
```
83+
84+
### Connection Configuration
85+
86+
```python
87+
import asyncio
88+
from speechmatics.tts import AsyncClient, ConnectionConfig
89+
90+
async def main():
91+
# Custom connection settings
92+
config = ConnectionConfig(
93+
url="https://preview.tts.speechmatics.com",
94+
api_key="your-api-key",
95+
connect_timeout=30.0,
96+
operation_timeout=600.0
97+
)
98+
99+
async with AsyncClient(conn_config=config) as client:
100+
response = await client.generate(text="Hello World")
101+
102+
103+
asyncio.run(main())
104+
```
105+
106+
## Logging
107+
108+
The client supports logging with job id tracing for debugging. To increase logging verbosity, set `DEBUG` level in your example code:
109+
110+
```python
111+
import logging
112+
import sys
113+
114+
logging.basicConfig(
115+
level=logging.DEBUG,
116+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
117+
handlers=[
118+
logging.StreamHandler(sys.stdout)
119+
]
120+
)
121+
```
122+
123+
## Environment Variables
124+
125+
The client supports the following environment variables:
126+
127+
- `SPEECHMATICS_API_KEY`: Your Speechmatics API key
128+
- `SPEECHMATICS_TTS_URL`: Custom API endpoint URL (optional)

sdk/tts/pyproject.toml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
[build-system]
2+
requires = ["setuptools>=61.0.0"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "speechmatics-tts"
7+
dynamic = ["version"]
8+
description = "Speechmatics TTS API Client"
9+
readme = "README.md"
10+
authors = [{ name = "Speechmatics", email = "[email protected]" }]
11+
license = "MIT"
12+
requires-python = ">=3.9"
13+
dependencies = ["aiohttp", "aiofiles"]
14+
classifiers = [
15+
"Development Status :: 4 - Beta",
16+
"Intended Audience :: Developers",
17+
"Programming Language :: Python :: 3",
18+
"Programming Language :: Python :: 3.9",
19+
"Programming Language :: Python :: 3.10",
20+
"Programming Language :: Python :: 3.11",
21+
"Programming Language :: Python :: 3.12",
22+
"Operating System :: OS Independent",
23+
"Topic :: Multimedia :: Sound/Audio :: Speech",
24+
"Topic :: Software Development :: Libraries :: Python Modules",
25+
]
26+
keywords = ["speechmatics", "speech-to-text", "tts", "transcription", "api"]
27+
28+
[project.optional-dependencies]
29+
dev = [
30+
"black",
31+
"ruff",
32+
"mypy",
33+
"types-aiofiles",
34+
"pre-commit",
35+
"pytest",
36+
"pytest-asyncio",
37+
"pytest-cov",
38+
"pytest-mock",
39+
"build",
40+
]
41+
42+
[project.urls]
43+
homepage = "https://github.com/speechmatics/speechmatics-python-sdk"
44+
documentation = "https://docs.speechmatics.com/"
45+
repository = "https://github.com/speechmatics/speechmatics-python-sdk"
46+
issues = "https://github.com/speechmatics/speechmatics-python-sdk/issues"
47+
48+
[tool.setuptools.dynamic]
49+
version = { attr = "speechmatics.tts.__version__" }
50+
51+
[tool.setuptools.packages.find]
52+
where = ["."]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
__version__ = "0.0.0"
2+
3+
from ._async_client import AsyncClient
4+
from ._auth import AuthBase
5+
from ._auth import JWTAuth
6+
from ._auth import StaticKeyAuth
7+
from ._exceptions import AuthenticationError
8+
from ._exceptions import ConfigurationError
9+
from ._exceptions import ConnectionError
10+
from ._exceptions import TimeoutError
11+
from ._exceptions import TransportError
12+
from ._models import ConnectionConfig
13+
from ._models import OutputFormat
14+
from ._models import Voice
15+
16+
__all__ = [
17+
"AsyncClient",
18+
"AuthBase",
19+
"JWTAuth",
20+
"StaticKeyAuth",
21+
"ConfigurationError",
22+
"AuthenticationError",
23+
"ConnectionError",
24+
"TransportError",
25+
"TimeoutError",
26+
"ConnectionConfig",
27+
"Voice",
28+
"OutputFormat",
29+
]

0 commit comments

Comments
 (0)