livekit · longcw · Sep 30, 2025 · longcw · Sep 30, 2025 · theomonnom
diff --git a/livekit-agents/livekit/agents/beta/workflows/consent_collection.py b/livekit-agents/livekit/agents/beta/workflows/consent_collection.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from ... import llm, stt, tts, vad
+from ...llm.tool_context import function_tool
+from ...types import NOT_GIVEN, NotGivenOr
+from ...voice.agent import AgentTask
+
+if TYPE_CHECKING:
+    from ...voice.agent_session import TurnDetectionMode
+
+
+@dataclass
+class CollectConsentResult:
+    consent: bool
+    denied_reason: str | None
+
+
+class CollectConsentTask(AgentTask[CollectConsentResult]):
+    def __init__(
+        self,
+        *,
+        chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN,
+        turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
+        stt: NotGivenOr[stt.STT | None] = NOT_GIVEN,
+        vad: NotGivenOr[vad.VAD | None] = NOT_GIVEN,
+        llm: NotGivenOr[llm.LLM | llm.RealtimeModel | None] = NOT_GIVEN,
+        tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN,
+        allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
+        extra_instructions: NotGivenOr[str] = NOT_GIVEN,
+    ) -> None:
+        instructions = self.base_instructions
+        if extra_instructions:
+            instructions += "\n" + extra_instructions
+        super().__init__(
+            instructions=instructions,
+            chat_ctx=chat_ctx,
+            turn_detection=turn_detection,
+            stt=stt,
+            vad=vad,
+            llm=llm,
+            tts=tts,
+            allow_interruptions=allow_interruptions,
+        )
+
+    @property
+    def base_instructions(self) -> str:
+        return (
+            "You are responsible for collecting consent from the user. "
+            "Call `consent_given` when the user explicitly gives consent or shows interest to follow up. "
+            "Call `consent_denied` when the user denies consent. "
+            "Ignore unrelated input and avoid going off-topic. Do not generate markdown, greetings, or unnecessary commentary. \n"
+            "Always explicitly invoke a tool when applicable. Do not simulate tool usage, no real action is taken unless the tool is explicitly called."
+        )
+
+    async def on_enter(self) -> None:
+        self.session.generate_reply(
+            instructions="Ask the user to provide their consent.", tool_choice="none"
+        )
+
+    @function_tool
+    async def consent_given(self) -> None:
+        """Called when the user explicitly gives consent or shows interest to follow up."""
+        if not self.done():
+            self.complete(CollectConsentResult(consent=True, denied_reason=None))
+
+    @function_tool
+    async def consent_denied(self, reason: str) -> None:
+        """Called when the user denies consent.
+
+        Args:
+            reason: The reason why the user denied consent, "unknown" if not stated
+        """
+        if not self.done():
+            self.complete(CollectConsentResult(consent=False, denied_reason=reason))
diff --git a/livekit-agents/livekit/agents/beta/workflows/email_address.py b/livekit-agents/livekit/agents/beta/workflows/email_address.py
@@ -27,38 +27,21 @@ class GetEmailResult:
 class GetEmailTask(AgentTask[GetEmailResult]):
     def __init__(
         self,
+        *,
         chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN,
         turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN,
         stt: NotGivenOr[stt.STT | None] = NOT_GIVEN,
         vad: NotGivenOr[vad.VAD | None] = NOT_GIVEN,
         llm: NotGivenOr[llm.LLM | llm.RealtimeModel | None] = NOT_GIVEN,
         tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN,
         allow_interruptions: NotGivenOr[bool] = NOT_GIVEN,
+        extra_instructions: NotGivenOr[str] = NOT_GIVEN,
     ) -> None:
+        instructions = self.base_instructions
+        if extra_instructions:
+            instructions += "\n" + extra_instructions
         super().__init__(
-            instructions=(
-                "You are only a single step in a broader system, responsible solely for capturing an email address.\n"
-                "Handle input as noisy voice transcription. Expect that users will say emails aloud with formats like:\n"
-                "- 'john dot doe at gmail dot com'\n"
-                "- 'susan underscore smith at yahoo dot co dot uk'\n"
-                "- 'dave dash b at protonmail dot com'\n"
-                "- 'jane at example' (partial—prompt for the domain)\n"
-                "- 'theo t h e o at livekit dot io' (name followed by spelling)\n"
-                "Normalize common spoken patterns silently:\n"
-                "- Convert words like 'dot', 'underscore', 'dash', 'plus' into symbols: `.`, `_`, `-`, `+`.\n"
-                "- Convert 'at' to `@`.\n"
-                "- Recognize patterns where users speak their name or a word, followed by spelling: e.g., 'john j o h n'.\n"
-                "- Filter out filler words or hesitations.\n"
-                "- Assume some spelling if contextually obvious (e.g. 'mike b two two' → mikeb22).\n"
-                "Don't mention corrections. Treat inputs as possibly imperfect but fix them silently.\n"
-                "Call `update_email_address` at the first opportunity whenever you form a new hypothesis about the email. "
-                "(before asking any questions or providing any answers.) \n"
-                "Don't invent new email addresses, stick strictly to what the user said. \n"
-                "Call `confirm_email_address` after the user confirmed the email address is correct. \n"
-                "If the email is unclear or invalid, or it takes too much back-and-forth, prompt for it in parts: first the part before the '@', then the domain—only if needed. \n"
-                "Ignore unrelated input and avoid going off-topic. Do not generate markdown, greetings, or unnecessary commentary. \n"
-                "Always explicitly invoke a tool when applicable. Do not simulate tool usage, no real action is taken unless the tool is explicitly called."
-            ),
+            instructions=instructions,
             chat_ctx=chat_ctx,
             turn_detection=turn_detection,
             stt=stt,
@@ -74,6 +57,32 @@ def __init__(
         # used to ignore the call to confirm_email_address in case the LLM is hallucinating and not asking for user confirmation
         self._email_update_speech_handle: SpeechHandle | None = None
 
+    @property
+    def base_instructions(self) -> str:
+        return (
+            "You are only a single step in a broader system, responsible solely for capturing an email address.\n"
+            "Handle input as noisy voice transcription. Expect that users will say emails aloud with formats like:\n"
+            "- 'john dot doe at gmail dot com'\n"
+            "- 'susan underscore smith at yahoo dot co dot uk'\n"
+            "- 'dave dash b at protonmail dot com'\n"
+            "- 'jane at example' (partial—prompt for the domain)\n"
+            "- 'theo t h e o at livekit dot io' (name followed by spelling)\n"
+            "Normalize common spoken patterns silently:\n"
+            "- Convert words like 'dot', 'underscore', 'dash', 'plus' into symbols: `.`, `_`, `-`, `+`.\n"
+            "- Convert 'at' to `@`.\n"
+            "- Recognize patterns where users speak their name or a word, followed by spelling: e.g., 'john j o h n'.\n"
+            "- Filter out filler words or hesitations.\n"
+            "- Assume some spelling if contextually obvious (e.g. 'mike b two two' → mikeb22).\n"
+            "Don't mention corrections. Treat inputs as possibly imperfect but fix them silently.\n"
+            "Call `update_email_address` at the first opportunity whenever you form a new hypothesis about the email. "
+            "(before asking any questions or providing any answers.) \n"
+            "Don't invent new email addresses, stick strictly to what the user said. \n"
+            "Call `confirm_email_address` after the user confirmed the email address is correct. \n"
+            "If the email is unclear or invalid, or it takes too much back-and-forth, prompt for it in parts: first the part before the '@', then the domain—only if needed. \n"
+            "Ignore unrelated input and avoid going off-topic. Do not generate markdown, greetings, or unnecessary commentary. \n"
+            "Always explicitly invoke a tool when applicable. Do not simulate tool usage, no real action is taken unless the tool is explicitly called."
+        )
+
     async def on_enter(self) -> None:
         self.session.generate_reply(
             instructions=(