-
Notifications
You must be signed in to change notification settings - Fork 1.6k
draft for workflow design #3537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,77 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
| from typing import TYPE_CHECKING | ||
|
|
||
| from ... import llm, stt, tts, vad | ||
| from ...llm.tool_context import function_tool | ||
| from ...types import NOT_GIVEN, NotGivenOr | ||
| from ...voice.agent import AgentTask | ||
|
|
||
| if TYPE_CHECKING: | ||
| from ...voice.agent_session import TurnDetectionMode | ||
|
|
||
|
|
||
| @dataclass | ||
| class CollectConsentResult: | ||
| consent: bool | ||
| denied_reason: str | None | ||
|
|
||
|
|
||
| class CollectConsentTask(AgentTask[CollectConsentResult]): | ||
| def __init__( | ||
| self, | ||
| *, | ||
| chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN, | ||
| turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN, | ||
| stt: NotGivenOr[stt.STT | None] = NOT_GIVEN, | ||
| vad: NotGivenOr[vad.VAD | None] = NOT_GIVEN, | ||
| llm: NotGivenOr[llm.LLM | llm.RealtimeModel | None] = NOT_GIVEN, | ||
| tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN, | ||
| allow_interruptions: NotGivenOr[bool] = NOT_GIVEN, | ||
| extra_instructions: NotGivenOr[str] = NOT_GIVEN, | ||
| ) -> None: | ||
| instructions = self.base_instructions | ||
| if extra_instructions: | ||
| instructions += "\n" + extra_instructions | ||
| super().__init__( | ||
| instructions=instructions, | ||
| chat_ctx=chat_ctx, | ||
| turn_detection=turn_detection, | ||
| stt=stt, | ||
| vad=vad, | ||
| llm=llm, | ||
| tts=tts, | ||
| allow_interruptions=allow_interruptions, | ||
| ) | ||
|
|
||
| @property | ||
| def base_instructions(self) -> str: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can override
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the original |
||
| return ( | ||
| "You are responsible for collecting consent from the user. " | ||
| "Call `consent_given` when the user explicitly gives consent or shows interest to follow up. " | ||
| "Call `consent_denied` when the user denies consent. " | ||
| "Ignore unrelated input and avoid going off-topic. Do not generate markdown, greetings, or unnecessary commentary. \n" | ||
| "Always explicitly invoke a tool when applicable. Do not simulate tool usage, no real action is taken unless the tool is explicitly called." | ||
| ) | ||
|
|
||
| async def on_enter(self) -> None: | ||
| self.session.generate_reply( | ||
| instructions="Ask the user to provide their consent.", tool_choice="none" | ||
| ) | ||
|
|
||
| @function_tool | ||
| async def consent_given(self) -> None: | ||
| """Called when the user explicitly gives consent or shows interest to follow up.""" | ||
| if not self.done(): | ||
| self.complete(CollectConsentResult(consent=True, denied_reason=None)) | ||
|
|
||
| @function_tool | ||
| async def consent_denied(self, reason: str) -> None: | ||
| """Called when the user denies consent. | ||
| Args: | ||
| reason: The reason why the user denied consent, "unknown" if not stated | ||
| """ | ||
| if not self.done(): | ||
| self.complete(CollectConsentResult(consent=False, denied_reason=reason)) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,38 +27,21 @@ class GetEmailResult: | |
| class GetEmailTask(AgentTask[GetEmailResult]): | ||
| def __init__( | ||
| self, | ||
| *, | ||
| chat_ctx: NotGivenOr[llm.ChatContext] = NOT_GIVEN, | ||
| turn_detection: NotGivenOr[TurnDetectionMode | None] = NOT_GIVEN, | ||
| stt: NotGivenOr[stt.STT | None] = NOT_GIVEN, | ||
| vad: NotGivenOr[vad.VAD | None] = NOT_GIVEN, | ||
| llm: NotGivenOr[llm.LLM | llm.RealtimeModel | None] = NOT_GIVEN, | ||
| tts: NotGivenOr[tts.TTS | None] = NOT_GIVEN, | ||
| allow_interruptions: NotGivenOr[bool] = NOT_GIVEN, | ||
| extra_instructions: NotGivenOr[str] = NOT_GIVEN, | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for these built-in workflows, I think we should allow users to add extra instructions or even modify instructions completely. for a real productions, users may want to add a lot of other prompts like context, tones, fallback handling, examples… |
||
| ) -> None: | ||
| instructions = self.base_instructions | ||
| if extra_instructions: | ||
| instructions += "\n" + extra_instructions | ||
| super().__init__( | ||
| instructions=( | ||
| "You are only a single step in a broader system, responsible solely for capturing an email address.\n" | ||
| "Handle input as noisy voice transcription. Expect that users will say emails aloud with formats like:\n" | ||
| "- 'john dot doe at gmail dot com'\n" | ||
| "- 'susan underscore smith at yahoo dot co dot uk'\n" | ||
| "- 'dave dash b at protonmail dot com'\n" | ||
| "- 'jane at example' (partial—prompt for the domain)\n" | ||
| "- 'theo t h e o at livekit dot io' (name followed by spelling)\n" | ||
| "Normalize common spoken patterns silently:\n" | ||
| "- Convert words like 'dot', 'underscore', 'dash', 'plus' into symbols: `.`, `_`, `-`, `+`.\n" | ||
| "- Convert 'at' to `@`.\n" | ||
| "- Recognize patterns where users speak their name or a word, followed by spelling: e.g., 'john j o h n'.\n" | ||
| "- Filter out filler words or hesitations.\n" | ||
| "- Assume some spelling if contextually obvious (e.g. 'mike b two two' → mikeb22).\n" | ||
| "Don't mention corrections. Treat inputs as possibly imperfect but fix them silently.\n" | ||
| "Call `update_email_address` at the first opportunity whenever you form a new hypothesis about the email. " | ||
| "(before asking any questions or providing any answers.) \n" | ||
| "Don't invent new email addresses, stick strictly to what the user said. \n" | ||
| "Call `confirm_email_address` after the user confirmed the email address is correct. \n" | ||
| "If the email is unclear or invalid, or it takes too much back-and-forth, prompt for it in parts: first the part before the '@', then the domain—only if needed. \n" | ||
| "Ignore unrelated input and avoid going off-topic. Do not generate markdown, greetings, or unnecessary commentary. \n" | ||
| "Always explicitly invoke a tool when applicable. Do not simulate tool usage, no real action is taken unless the tool is explicitly called." | ||
| ), | ||
| instructions=instructions, | ||
| chat_ctx=chat_ctx, | ||
| turn_detection=turn_detection, | ||
| stt=stt, | ||
|
|
@@ -74,6 +57,32 @@ def __init__( | |
| # used to ignore the call to confirm_email_address in case the LLM is hallucinating and not asking for user confirmation | ||
| self._email_update_speech_handle: SpeechHandle | None = None | ||
|
|
||
| @property | ||
| def base_instructions(self) -> str: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. besides adding a |
||
| return ( | ||
| "You are only a single step in a broader system, responsible solely for capturing an email address.\n" | ||
| "Handle input as noisy voice transcription. Expect that users will say emails aloud with formats like:\n" | ||
| "- 'john dot doe at gmail dot com'\n" | ||
| "- 'susan underscore smith at yahoo dot co dot uk'\n" | ||
| "- 'dave dash b at protonmail dot com'\n" | ||
| "- 'jane at example' (partial—prompt for the domain)\n" | ||
| "- 'theo t h e o at livekit dot io' (name followed by spelling)\n" | ||
| "Normalize common spoken patterns silently:\n" | ||
| "- Convert words like 'dot', 'underscore', 'dash', 'plus' into symbols: `.`, `_`, `-`, `+`.\n" | ||
| "- Convert 'at' to `@`.\n" | ||
| "- Recognize patterns where users speak their name or a word, followed by spelling: e.g., 'john j o h n'.\n" | ||
| "- Filter out filler words or hesitations.\n" | ||
| "- Assume some spelling if contextually obvious (e.g. 'mike b two two' → mikeb22).\n" | ||
| "Don't mention corrections. Treat inputs as possibly imperfect but fix them silently.\n" | ||
| "Call `update_email_address` at the first opportunity whenever you form a new hypothesis about the email. " | ||
| "(before asking any questions or providing any answers.) \n" | ||
| "Don't invent new email addresses, stick strictly to what the user said. \n" | ||
| "Call `confirm_email_address` after the user confirmed the email address is correct. \n" | ||
| "If the email is unclear or invalid, or it takes too much back-and-forth, prompt for it in parts: first the part before the '@', then the domain—only if needed. \n" | ||
| "Ignore unrelated input and avoid going off-topic. Do not generate markdown, greetings, or unnecessary commentary. \n" | ||
| "Always explicitly invoke a tool when applicable. Do not simulate tool usage, no real action is taken unless the tool is explicitly called." | ||
| ) | ||
|
|
||
| async def on_enter(self) -> None: | ||
| self.session.generate_reply( | ||
| instructions=( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CollectConsent might be another common task (e.g. #3089), it will require more customization on instructions to add the context