Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
API_AUTH_HEADER = "X-API-Key"
API_VERSION_HEADER = "Cartesia-Version"
API_VERSION = "2025-04-16"
API_VERSION_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS = "2024-11-13"
MODEL_ID_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS = "sonic-2-2025-03-07"


@dataclass
Expand All @@ -64,6 +66,7 @@ class _TTSOptions:
api_key: str
language: str
base_url: str
api_version: str

def get_http_url(self, path: str) -> str:
return f"{self.base_url}{path}"
Expand All @@ -89,6 +92,7 @@ def __init__(
tokenizer: NotGivenOr[tokenize.SentenceTokenizer] = NOT_GIVEN,
text_pacing: tts.SentenceStreamPacer | bool = False,
base_url: str = "https://api.cartesia.ai",
api_version: str = API_VERSION_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should default to the non-deprecated version. we have to follow API deprecation from the model providers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be another issue for updating 3rd api version, which should be highlighted in livekit change log. Because some users might be using the old version.

) -> None:
"""
Create a new instance of Cartesia TTS.
Expand Down Expand Up @@ -123,12 +127,16 @@ def __init__(
if not cartesia_api_key:
raise ValueError("CARTESIA_API_KEY must be set")

if (speed or emotion) and model != "sonic-2-2025-03-07":
logger.warning(
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', "
"see https://docs.cartesia.ai/developer-tools/changelog for details",
extra={"model": model, "speed": speed, "emotion": emotion},
)
if speed or emotion:
if (
api_version != API_VERSION_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS
or model != MODEL_ID_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS
):
logger.warning(
f"speed and emotion controls are only supported for model '{MODEL_ID_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS}', and API version '{API_VERSION_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS}', "
"see https://docs.cartesia.ai/developer-tools/changelog for details",
extra={"model": model, "speed": speed, "emotion": emotion},
)

self._opts = _TTSOptions(
model=model,
Expand All @@ -141,6 +149,7 @@ def __init__(
api_key=cartesia_api_key,
base_url=base_url,
word_timestamps=word_timestamps,
api_version=api_version,
)
self._session = http_session
self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
Expand Down Expand Up @@ -170,7 +179,7 @@ def provider(self) -> str:
async def _connect_ws(self, timeout: float) -> aiohttp.ClientWebSocketResponse:
session = self._ensure_session()
url = self._opts.get_ws_url(
f"/tts/websocket?api_key={self._opts.api_key}&cartesia_version={API_VERSION}"
f"/tts/websocket?api_key={self._opts.api_key}&cartesia_version={self._opts.api_version}"
)
return await asyncio.wait_for(session.ws_connect(url), timeout)

Expand All @@ -194,6 +203,7 @@ def update_options(
voice: NotGivenOr[str | list[float]] = NOT_GIVEN,
speed: NotGivenOr[TTSVoiceSpeed | float | None] = NOT_GIVEN,
emotion: NotGivenOr[list[TTSVoiceEmotion | str] | None] = NOT_GIVEN,
api_version: NotGivenOr[str] = NOT_GIVEN,
) -> None:
"""
Update the Text-to-Speech (TTS) configuration options.
Expand All @@ -218,13 +228,19 @@ def update_options(
self._opts.speed = cast(Optional[Union[TTSVoiceSpeed, float]], speed)
if is_given(emotion):
self._opts.emotion = emotion

if (speed or emotion) and self._opts.model != "sonic-2-2025-03-07":
logger.warning(
"speed and emotion controls are only supported for model 'sonic-2-2025-03-07', "
"see https://docs.cartesia.ai/developer-tools/changelog for details",
extra={"model": self._opts.model, "speed": speed, "emotion": emotion},
)
if is_given(api_version):
self._opts.api_version = api_version

if speed or emotion:
if (
self._opts.api_version != API_VERSION_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS
or self._opts.model != MODEL_ID_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS
):
logger.warning(
f"speed and emotion controls are only supported for model '{MODEL_ID_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS}', and API version '{API_VERSION_WITH_EMBEDDINGS_AND_EXPERIMENTAL_CONTROLS}', "
"see https://docs.cartesia.ai/developer-tools/changelog for details",
extra={"model": self._opts.model, "speed": speed, "emotion": emotion},
)

def synthesize(
self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
Expand Down