Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions examples/voice_agents/sms_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import logging
from typing import Any, override

from dotenv import load_dotenv

from livekit.agents import (
Agent,
AgentServer,
AgentSession,
JobContext,
RunContext,
TextMessageContext,
cli,
)
from livekit.agents.beta.workflows import GetEmailTask
from livekit.agents.llm import function_tool
from livekit.plugins import silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel

# uncomment to enable Krisp background voice/noise cancellation
# from livekit.plugins import noise_cancellation

logger = logging.getLogger("basic-agent")

load_dotenv()


class MyAgent(Agent):
def __init__(self, *, text_mode: bool) -> None:
super().__init__(
instructions="Your name is Kelly. You would interact with users via voice."
"with that in mind keep your responses concise and to the point."
"do not use emojis, asterisks, markdown, or other special characters in your responses."
"You are curious and friendly, and have a sense of humor."
"you will speak english to the user",
)
self._text_mode = text_mode

@override
def get_init_kwargs(self) -> dict[str, Any]:
return {
"text_mode": self._text_mode,
}

async def on_enter(self):
if not self._text_mode:
logger.debug("greeting the user")
self.session.generate_reply(allow_interruptions=False)

# all functions annotated with @function_tool will be passed to the LLM when this
# agent is active
@function_tool
async def lookup_weather(
self, context: RunContext, location: str, latitude: str, longitude: str
):
"""Called when the user asks for weather related information.
Ensure the user's location (city or region) is provided.
When given a location, please estimate the latitude and longitude of the location and
do not ask the user for them.

Args:
location: The location they are asking for
latitude: The latitude of the location, do not ask user for it
longitude: The longitude of the location, do not ask user for it
"""

logger.info(f"Looking up weather for {location}")

# this will create multiple responses to the user
context.session.say("Let me check the weather for you")

return "sunny with a temperature of 70 degrees."

@function_tool
async def register_for_weather(self, context: RunContext):
"""Called when the user wants to register for the weather event."""

get_email_task = GetEmailTask(
extra_instructions=(
"You are communicate to the user via text messages, "
"so there is no need to verify the email address with the user multiple times."
)
if self._text_mode
else ""
)
get_email_task.configure(llm="openai/gpt-4.1")

email_result = await get_email_task

# TODO: serialize durable function calls
email_address = email_result.email_address

logger.info(f"User's email address: {email_address}")

return "You are now registered for the weather event."


server = AgentServer()


@server.text_handler()
async def sms_handler(ctx: TextMessageContext):
logger.info(f"SMS received: {ctx.text}")

session = AgentSession(
llm="openai/gpt-4.1-mini",
state_passphrase="my-secret-passphrase",
)
if ctx.session_data:
await session.rehydrate(ctx.session_data)
else:
await session.start(agent=MyAgent(text_mode=True))

async for ev in session.run(user_input=ctx.text):
await ctx.send_response(ev)


@server.rtc_session()
async def entrypoint(ctx: JobContext):
session = AgentSession(
stt="deepgram/nova-3",
llm="openai/gpt-4.1-mini",
tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
turn_detection=MultilingualModel(),
vad=silero.VAD.load(),
preemptive_generation=True,
)

await session.start(agent=MyAgent(text_mode=False), room=ctx.room)


if __name__ == "__main__":
cli.run_app(server)
2 changes: 2 additions & 0 deletions livekit-agents/livekit/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
JobExecutorType,
JobProcess,
JobRequest,
TextMessageContext,
get_job_context,
)
from .llm.chat_context import (
Expand Down Expand Up @@ -121,6 +122,7 @@ def __getattr__(name: str) -> typing.Any:
"JobProcess",
"JobContext",
"JobRequest",
"TextMessageContext",
"get_job_context",
"JobExecutorType",
"AutoSubscribe",
Expand Down
16 changes: 15 additions & 1 deletion livekit-agents/livekit/agents/beta/workflows/address.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

from ... import llm, stt, tts, vad
from ...llm.tool_context import ToolError, ToolFlag, function_tool
Expand Down Expand Up @@ -32,6 +32,10 @@ def __init__(
tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN,
allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
) -> None:
self._init_kwargs = {
"extra_instructions": extra_instructions,
"allow_interruptions": allow_interruptions,
}
super().__init__(
instructions=(
"You are only a single step in a broader system, responsible solely for capturing an address.\n"
Expand Down Expand Up @@ -77,6 +81,16 @@ def __init__(

self._address_update_speech_handle: SpeechHandle | None = None

def get_init_kwargs(self) -> dict[str, Any]:
return self._init_kwargs

def __getstate__(self) -> dict[str, Any]:
return super().__getstate__() | {"current_address": self._current_address}

def __setstate__(self, state: dict[str, Any]) -> None:
super().__setstate__(state)
self._current_address = state["current_address"]

async def on_enter(self) -> None:
self.session.generate_reply(instructions="Ask the user to provide their address.")

Expand Down
16 changes: 15 additions & 1 deletion livekit-agents/livekit/agents/beta/workflows/email_address.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re
from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

from ... import llm, stt, tts, vad
from ...llm.tool_context import ToolError, ToolFlag, function_tool
Expand Down Expand Up @@ -37,6 +37,10 @@ def __init__(
tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN,
allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
) -> None:
self._init_kwargs = {
"extra_instructions": extra_instructions,
"allow_interruptions": allow_interruptions,
}
super().__init__(
instructions=(
"You are only a single step in a broader system, responsible solely for capturing an email address.\n"
Expand Down Expand Up @@ -78,6 +82,9 @@ def __init__(
# used to ignore the call to confirm_email_address in case the LLM is hallucinating and not asking for user confirmation
self._email_update_speech_handle: SpeechHandle | None = None

def get_init_kwargs(self) -> dict[str, Any]:
return self._init_kwargs

async def on_enter(self) -> None:
self.session.generate_reply(instructions="Ask the user to provide an email address.")

Expand Down Expand Up @@ -128,3 +135,10 @@ async def decline_email_capture(self, reason: str) -> None:
"""
if not self.done():
self.complete(ToolError(f"couldn't get the email address: {reason}"))

def __getstate__(self) -> dict[str, Any]:
return super().__getstate__() | {"current_email": self._current_email}

def __setstate__(self, state: dict[str, Any]) -> None:
super().__setstate__(state)
self._current_email = state["current_email"]
Loading
Loading