diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 0482866c..fde81192 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -3,29 +3,22 @@ { "name": "astra", "image": "ghcr.io/rte-design/astra_agents_build", - "customizations": { "vscode": { - "extensions": ["golang.go"] + "extensions": [ + "golang.go" + ] } }, - - "workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind", + "workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind", "workspaceFolder": "/app", - // Use 'forwardPorts' to make a list of ports inside the container available locally. - "forwardPorts": [8080], - + "forwardPorts": [ + 8080, + 49483 + ], // Features to add to the dev container. More info: https://containers.dev/features. "features": { "ghcr.io/devcontainers/features/git:1": {} } - - // Uncomment the next line to run commands after the container is created. - // "postCreateCommand": "" - - // Configure tool-specific properties. - // "customizations": {}, - // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "devcontainer" } \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..29f50432 --- /dev/null +++ b/.env.example @@ -0,0 +1,67 @@ +# ------------------------------ +# Environment Variables for server & worker +# ------------------------------ + +# ------------------------------ +# Server Configuration +# ------------------------------ + +# Log path +LOG_PATH=/tmp +# Graph designer server port +GRAPH_DESIGNER_SERVER_PORT=49483 +# Server port +SERVER_PORT=8080 +# Maximum number of workers +WORKERS_MAX=100 +# Worker quit timeout in seconds +WORKER_QUIT_TIMEOUT_SECONDES=60 + +# Agora App ID and Agora App Certificate +# required: this variable must be set +AGORA_APP_ID= +AGORA_APP_CERTIFICATE= + +# ------------------------------ +# Worker Configuration +# ------------------------------ + +# Extension: bedrock_llm +# Extension: polly_tts +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_BEDROCK_MODEL= +AWS_REGION= + +# Extension: agora_rtc +# Azure STT key and region +AZURE_STT_KEY= +AZURE_STT_REGION= + +# Extension: azure_tts +# Azure TTS key and region +AZURE_TTS_KEY= +AZURE_TTS_REGION= + +# Extension: cosy_tts +# Cosy TTS key +COSY_TTS_KEY= + +# Extension: elevenlabs_tts +# ElevenLabs TTS key +ELEVENLABS_TTS_KEY= + +# Extension: openai_chatgpt +# OpenAI API key +OPENAI_API_KEY= +# OpenAI base URL +# if using OpenAI, keep default. using other OpenAI-compatible providers, then set it to the other provider's address +OPENAI_BASE_URL= +# OpenAI Model +OPENAI_MODEL=gpt-4o-mini +# OpenAI proxy URL +OPENAI_PROXY_URL= + +# Extension: qwen_llm +# Qwen API key +QWEN_API_KEY= diff --git a/.gitignore b/.gitignore index 6217b5c0..8a1ca581 100644 --- a/.gitignore +++ b/.gitignore @@ -13,7 +13,6 @@ bin/ core crash_context_v1 .deps/ -docker-compose.yml .DS_Store .env /.gn @@ -22,11 +21,6 @@ include/ interface/ lib/ lib64 -agents/manifest.json -agents/manifest.elevenlabs.json -agents/manifest.cn.json -agents/manifest.en.json -!agents/addon/manifest.json node_modules/ /out/ *.pcm @@ -37,4 +31,4 @@ xdump_config speechsdk/ SpeechSDK-Linux.tar.gz pyvenv.cfg -xdump_config \ No newline at end of file +xdump_config diff --git a/Dockerfile b/Dockerfile index 621b9f6d..a383ca16 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,14 @@ -FROM ghcr.io/rte-design/astra_agents_build:0.3.3 AS builder +FROM ghcr.io/rte-design/astra_agents_build:0.3.5 AS builder ARG SESSION_CONTROL_CONF=session_control.conf WORKDIR /app COPY . . -COPY agents/manifest.json.example agents/manifest.json -COPY agents/manifest.json.elevenlabs.example agents/manifest.elevenlabs.json +COPY agents/property.json.example agents/property.json COPY agents/${SESSION_CONTROL_CONF} agents/session_control.conf -RUN make build && \ +RUN make clean && make build && \ cd agents && ./scripts/package.sh FROM ubuntu:22.04 diff --git a/Makefile b/Makefile index 9bbee36d..1fe1dba3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ PROJECT_NAME := astra PROJECT_VERSION ?= "0.1."$(shell date -u +'%Y%m%d%H') REGISTRY ?= agoraio/ -.PHONY: build build-agents build-playground build-server clean clean-agents docker-build-playground docker-build-server run-server +.PHONY: build build-agents build-playground build-server clean clean-agents docker-build-playground docker-build-server run-gd-server run-server build: build-agents build-server @@ -25,7 +25,7 @@ clean: clean-agents clean-agents: @echo ">> clean agents" - rm -rf agents/manifest.json agents/bin agents/out agents/interface agents/include agents/lib agents/lib64 agents/addon/system agents/addon/extension_group agents/.release + rm -rf agents/bin agents/out agents/interface agents/include agents/lib agents/lib64 agents/addon/system agents/addon/extension_group agents/.release @echo ">> done" docker-build-playground: @@ -38,6 +38,11 @@ docker-build-server: docker build -t $(REGISTRY)$(PROJECT_NAME)_agents_server:$(PROJECT_VERSION) --platform linux/amd64 -f Dockerfile . @echo ">> done" +run-gd-server: + @echo ">> run graph designer server" + cd agents && arpm dev-server + @echo ">> done" + run-server: @echo ">> run server" server/bin/api diff --git a/README.md b/README.md index 6a1b64bc..7acfc1c9 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ We showcase an impressive voice agent called Astra, powered by TEN, demonstratin - Agora App ID and App Certificate([read here on how](https://docs.agora.io/en/video-calling/get-started/manage-agora-account?platform=web)) - Azure's [speech-to-text](https://azure.microsoft.com/en-us/products/ai-services/speech-to-text) and [text-to-speech](https://azure.microsoft.com/en-us/products/ai-services/text-to-speech) API keys - [OpenAI](https://openai.com/index/openai-api/) API key -- [Docker](https://www.docker.com/) +- [Docker](https://www.docker.com/) / [Docker Compose](https://docs.docker.com/compose/) - [Node.js(LTS) v18](https://nodejs.org/en) #### Docker setting on apple silicon @@ -58,85 +58,59 @@ You will need to uncheck "Use Rosetta for x86_64/amd64 emulation on apple silico -#### 1. Create manifest.json +#### 1. Prepare config files ```bash -# Create manifest.json from the example -cp ./agents/manifest.json.example ./agents/manifest.json +# Create property.json from the example +cp ./agents/property.json.example ./agents/property.json +# Create .env from the example +cp ./.env.example ./.env ``` -#### 2. Modify prompt and greeting - -```js -// Feel free to edit prompt and greeting in manifest.json -"property": { - "base_url": "", - "api_key": "", - "frequency_penalty": 0.9, - "model": "gpt-3.5-turbo", - "max_tokens": 512, - "prompt": "", // prompt - "proxy_url": "", - "greeting": "Astra agent connected. How can I help you today?", // greeting - "max_memory_length": 10 -} +#### 2. Setup API keys & Environment varialbes in .env file ``` +... +# Agora App ID and Agora App Certificate +# required: this variable must be set +AGORA_APP_ID= +AGORA_APP_CERTIFICATE= +... +# Extension: agora_rtc +# Azure STT key and region +AZURE_STT_KEY= +AZURE_STT_REGION= -#### 3. Create agent in Docker container - -```bash -# In CLI, pull Docker image and mount the target directory -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build - -# Windows Git Bash -# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build - -# Enter container -docker exec -it astra_agents_dev bash - -# Create agent -make build +# Extension: azure_tts +# Azure TTS key and region +AZURE_TTS_KEY= +AZURE_TTS_REGION= +... +# Extension: openai_chatgpt +# OpenAI API key +OPENAI_API_KEY= ``` -#### 4. Export env variables and start server - +#### 3. Start agent builder toolkit containers ```bash -# In the same CLI window, set env variables -export AGORA_APP_ID= -export AGORA_APP_CERTIFICATE= - -# OpenAI API key -export OPENAI_API_KEY= - -# Azure STT key and region -export AZURE_STT_KEY= -export AZURE_STT_REGION= - -# Azure TTS key and region -export AZURE_TTS_KEY= -export AZURE_TTS_REGION= - -# Run server on port 8080 -make run-server +# Execute docker compose up to start the services +docker compose up ``` -#### 5. Connect voice agent UI to server - -Open a separate Terminal tab and run the commands: +#### 4. Build your agent and start server ```bash -# Create a .env file from example -cd playground -cp .env.example .env +# Enter container to build agent +docker exec -it astra_agents_dev bash +make build -# Install dependencies and start dev environment in localhost:3000 -npm install && npm run dev +# Run server on port 8080 +make run-server ``` -#### 6. Verify your customized voice agent 🎉 +#### 5. Verify your voice agent 🎉 -Open `localhost:3000` in your browser, you should be seeing a voice agent just like the Astra, yet with your own customizations. +You can open `localhost:3000` in your browser to test your own agent, or open `localhost:3001` in your browser to build your workflow by Graph Designer.

Voice agent architecture

@@ -192,4 +166,4 @@ Contributions are welcome! Please read the [contribution guidelines](CONTRIBUTIN

License

-This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details. +This project is licensed under the Apache 2.0 License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file diff --git a/agents/.gitignore b/agents/.gitignore index 46de7887..acb3acf1 100644 --- a/agents/.gitignore +++ b/agents/.gitignore @@ -26,3 +26,4 @@ session_control.conf.agora xdump_config .vscode *.pyc +property.json diff --git a/agents/addon/extension/azure_tts/manifest.json b/agents/addon/extension/azure_tts/manifest.json index 5064033b..979a8743 100644 --- a/agents/addon/extension/azure_tts/manifest.json +++ b/agents/addon/extension/azure_tts/manifest.json @@ -1,19 +1,19 @@ { "type": "extension", "name": "azure_tts", - "version": "0.2.0", + "version": "0.4.0", "language": "cpp", "dependencies": [ { "type": "system", "name": "rte_runtime", - "version": "0.3.1" + "version": "0.4.0" }, { "type": "system", "name": "azure_speech_sdk", "version": "1.38.0" - } + } ], "api": { "property": { diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py index e8dd6472..78f3e55b 100644 --- a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py @@ -70,78 +70,28 @@ class BedrockLLMExtension(Extension): outdate_ts = 0 bedrock_llm = None - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("BedrockLLMExtension on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("BedrockLLMExtension on_start") # Prepare configuration bedrock_llm_config = BedrockLLMConfig.default_config() - try: - region = rte.get_property_string(PROPERTY_REGION) - if region: - bedrock_llm_config.region = region - except Exception as err: - logger.debug( - f"GetProperty optional {PROPERTY_REGION} failed, err: {err}. Using default value: {bedrock_llm_config.region}" - ) - return - - try: - access_key = rte.get_property_string(PROPERTY_ACCESS_KEY) - bedrock_llm_config.access_key = access_key - except Exception as err: - logger.error( - f"GetProperty optional {PROPERTY_ACCESS_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.access_key}" - ) - return - - try: - secret_key = rte.get_property_string(PROPERTY_SECRET_KEY) - bedrock_llm_config.secret_key = secret_key - except Exception as err: - logger.error( - f"GetProperty optional {PROPERTY_SECRET_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.secret_key}" - ) - return - - try: - model = rte.get_property_string(PROPERTY_MODEL) - if model: - bedrock_llm_config.model = model - except Exception as err: - logger.debug( - f"GetProperty optional {PROPERTY_MODEL} error: {err}. Using default value: {bedrock_llm_config.model}" - ) - - try: - prompt = rte.get_property_string(PROPERTY_PROMPT) - if prompt: - bedrock_llm_config.prompt = prompt - except Exception as err: - logger.debug( - f"GetProperty optional {PROPERTY_PROMPT} error: {err}. Using default value: {bedrock_llm_config.prompt}" - ) - - try: - temperature = rte.get_property_float(PROPERTY_TEMPERATURE) - bedrock_llm_config.temperature = float(temperature) - except Exception as err: - logger.debug( - f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}. Using default value: {bedrock_llm_config.temperature}" - ) + for optional_str_param in [ + PROPERTY_REGION, PROPERTY_ACCESS_KEY, PROPERTY_SECRET_KEY, + PROPERTY_MODEL, PROPERTY_PROMPT]: + try: + value = rte.get_property_string(optional_str_param).strip() + if value: + bedrock_llm_config.__setattr__(optional_str_param, value) + except Exception as err: + logger.debug(f"GetProperty optional {optional_str_param} failed, err: {err}. Using default value: {bedrock_llm_config.__getattribute__(optional_str_param)}") - try: - top_p = rte.get_property_float(PROPERTY_TOP_P) - bedrock_llm_config.top_p = float(top_p) - except Exception as err: - logger.debug( - f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}. Using default value: {bedrock_llm_config.top_p}" - ) + for optional_float_param in [PROPERTY_TEMPERATURE, PROPERTY_TOP_P]: + try: + value = rte.get_property_float(optional_float_param) + if value: + bedrock_llm_config.__setattr__(optional_float_param, value) + except Exception as err: + logger.debug(f"GetProperty optional {optional_float_param} failed, err: {err}. Using default value: {bedrock_llm_config.__getattribute__(optional_float_param)}") try: max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) @@ -175,7 +125,7 @@ def on_start(self, rte: RteEnv) -> None: f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}" ) except Exception as err: - logger.info(f"newBedrockLLM failed, err: {err}") + logger.exception(f"newBedrockLLM failed, err: {err}") # Send greeting if available if greeting: @@ -197,10 +147,6 @@ def on_stop(self, rte: RteEnv) -> None: logger.info("BedrockLLMExtension on_stop") rte.on_stop_done() - def on_deinit(self, rte: RteEnv) -> None: - logger.info("BedrockLLMExtension on_deinit") - rte.on_deinit_done() - def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("BedrockLLMExtension on_cmd") cmd_json = cmd.to_json() @@ -411,16 +357,6 @@ def converse_stream_worker(start_time, input_text, memory): @register_addon_as_extension("bedrock_llm_python") class BedrockLLMExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("BedrockLLMExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") rte.on_create_instance_done(BedrockLLMExtension(addon_name), context) - - def on_deinit(self, rte: RteEnv) -> None: - logger.info("BedrockLLMExtensionAddon on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/bedrock_llm_python/manifest.json b/agents/addon/extension/bedrock_llm_python/manifest.json index 7f912581..3613083f 100644 --- a/agents/addon/extension/bedrock_llm_python/manifest.json +++ b/agents/addon/extension/bedrock_llm_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/bedrock_llm_python/requirements.txt b/agents/addon/extension/bedrock_llm_python/requirements.txt index 0a92c2be..f9f40f55 100644 --- a/agents/addon/extension/bedrock_llm_python/requirements.txt +++ b/agents/addon/extension/bedrock_llm_python/requirements.txt @@ -1 +1,2 @@ +pillow==10.4.0 boto3==1.34.143 \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber/manifest.json b/agents/addon/extension/chat_transcriber/manifest.json index 8d3470fd..aaf7ad82 100644 --- a/agents/addon/extension/chat_transcriber/manifest.json +++ b/agents/addon/extension/chat_transcriber/manifest.json @@ -5,9 +5,9 @@ "language": "go", "dependencies": [ { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" + "type": "system", + "name": "rte_runtime_go", + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py index 9e141265..56c04e92 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py @@ -8,11 +8,6 @@ @register_addon_as_extension("chat_transcriber_python") class ChatTranscriberExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") @@ -20,7 +15,3 @@ def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: rte.on_create_instance_done(ChatTranscriberExtension(addon_name), context) - def on_deinit(self, rte: RteEnv) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index de8a3dc6..5aa3c02c 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -32,12 +32,6 @@ class ChatTranscriberExtension(Extension): - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("on_start") rte.on_start_done() @@ -46,10 +40,6 @@ def on_stop(self, rte: RteEnv) -> None: logger.info("on_stop") rte.on_stop_done() - def on_deinit(self, rte: RteEnv) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("on_cmd") cmd_json = cmd.to_json() diff --git a/agents/addon/extension/chat_transcriber_python/manifest.json b/agents/addon/extension/chat_transcriber_python/manifest.json index f64295b6..d380f117 100644 --- a/agents/addon/extension/chat_transcriber_python/manifest.json +++ b/agents/addon/extension/chat_transcriber_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/cosy_tts/cosy_tts_addon.py b/agents/addon/extension/cosy_tts/cosy_tts_addon.py index b9135b3e..4c4a05b5 100644 --- a/agents/addon/extension/cosy_tts/cosy_tts_addon.py +++ b/agents/addon/extension/cosy_tts/cosy_tts_addon.py @@ -8,19 +8,9 @@ @register_addon_as_extension("cosy_tts") class CosyTTSExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("CosyTTSExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .cosy_tts_extension import CosyTTSExtension rte.on_create_instance_done(CosyTTSExtension(addon_name), context) - - def on_deinit(self, rte: RteEnv) -> None: - logger.info("CosyTTSExtensionAddon on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/cosy_tts/cosy_tts_extension.py b/agents/addon/extension/cosy_tts/cosy_tts_extension.py index 894bdd16..368d10ea 100644 --- a/agents/addon/extension/cosy_tts/cosy_tts_extension.py +++ b/agents/addon/extension/cosy_tts/cosy_tts_extension.py @@ -11,7 +11,7 @@ RteEnv, Cmd, PcmFrame, - RTE_PCM_FRAME_DATA_FMT, + PcmFrameDataFmt, Data, StatusCode, CmdResult, @@ -61,7 +61,7 @@ def get_frame(self, data: bytes) -> PcmFrame: f.set_bytes_per_sample(2) f.set_number_of_channels(1) # f.set_timestamp = 0 - f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE) + f.set_data_fmt(PcmFrameDataFmt.INTERLEAVE) f.set_samples_per_channel(self.sample_rate // 100) f.alloc_buf(self.frame_size) buff = f.lock_buf() @@ -116,10 +116,6 @@ def __init__(self, name: str): self.queue = queue.Queue() self.mutex = threading.Lock() - def on_init(self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo) -> None: - logger.info("CosyTTSExtension on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("CosyTTSExtension on_start") self.api_key = rte.get_property_string("api_key") @@ -160,10 +156,6 @@ def on_stop(self, rte: RteEnv) -> None: self.thread.join() rte.on_stop_done() - def on_deinit(self, rte: RteEnv) -> None: - logger.info("CosyTTSExtension on_deinit") - rte.on_deinit_done() - def need_interrupt(self, ts: datetime.time) -> bool: return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 diff --git a/agents/addon/extension/cosy_tts/manifest.json b/agents/addon/extension/cosy_tts/manifest.json index 8690db03..82a29cc0 100644 --- a/agents/addon/extension/cosy_tts/manifest.json +++ b/agents/addon/extension/cosy_tts/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go index 4ca3b9de..16e64321 100644 --- a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go +++ b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go @@ -272,7 +272,7 @@ func (e *elevenlabsTTSExtension) OnCmd( cmdName, err := cmd.GetName() if err != nil { slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - cmdResult, _ := rte.NewCmdResult(rte.Error) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeError) rteEnv.ReturnResult(cmdResult, cmd) return } @@ -287,14 +287,14 @@ func (e *elevenlabsTTSExtension) OnCmd( outCmd, err := rte.NewCmd(cmdOutFlush) if err != nil { slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - cmdResult, _ := rte.NewCmdResult(rte.Error) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeError) rteEnv.ReturnResult(cmdResult, cmd) return } if err := rteEnv.SendCmd(outCmd, nil); err != nil { slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - cmdResult, _ := rte.NewCmdResult(rte.Error) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeError) rteEnv.ReturnResult(cmdResult, cmd) return } else { @@ -302,7 +302,7 @@ func (e *elevenlabsTTSExtension) OnCmd( } } - cmdResult, _ := rte.NewCmdResult(rte.Ok) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeOk) rteEnv.ReturnResult(cmdResult, cmd) } diff --git a/agents/addon/extension/elevenlabs_tts/manifest.json b/agents/addon/extension/elevenlabs_tts/manifest.json index 063c2e84..40c256ea 100644 --- a/agents/addon/extension/elevenlabs_tts/manifest.json +++ b/agents/addon/extension/elevenlabs_tts/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_go", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py index 90d2bd4a..896e04d9 100644 --- a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py @@ -16,18 +16,8 @@ @register_addon_as_extension("elevenlabs_tts_python") class ElevenlabsTTSExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .elevenlabs_tts_extension import ElevenlabsTTSExtension rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) - - def on_deinit(self, rte: RteEnv) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py index 803ed5c2..ede19d97 100644 --- a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py @@ -47,10 +47,8 @@ def __init__(self, text: str, received_ts: int) -> None: class ElevenlabsTTSExtension(Extension): - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("on_init") + def on_start(self, rte: RteEnv) -> None: + logger.info("on_start") self.elevenlabs_tts = None self.outdate_ts = 0 @@ -58,11 +56,6 @@ def on_init( self.pcm_frame_size = 0 self.text_queue = queue.Queue(maxsize=1024) - rte.on_init_done(manifest, property) - - def on_start(self, rte: RteEnv) -> None: - logger.info("on_start") - # prepare configuration elevenlabs_tts_config = default_elevenlabs_tts_config() @@ -156,10 +149,6 @@ def on_stop(self, rte: RteEnv) -> None: logger.info("on_stop") rte.on_stop_done() - def on_deinit(self, rte: RteEnv) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: """ on_cmd receives cmd from rte graph. diff --git a/agents/addon/extension/elevenlabs_tts_python/manifest.json b/agents/addon/extension/elevenlabs_tts_python/manifest.json index 7cf5c061..0c9fe25c 100644 --- a/agents/addon/extension/elevenlabs_tts_python/manifest.json +++ b/agents/addon/extension/elevenlabs_tts_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/elevenlabs_tts_python/pcm.py b/agents/addon/extension/elevenlabs_tts_python/pcm.py index 8f73c87b..67a60f21 100644 --- a/agents/addon/extension/elevenlabs_tts_python/pcm.py +++ b/agents/addon/extension/elevenlabs_tts_python/pcm.py @@ -8,7 +8,7 @@ import logging from typing import Iterator -from rte import PcmFrame, RteEnv, RTE_PCM_FRAME_DATA_FMT +from rte import PcmFrame, RteEnv, PcmFrameDataFmt class Pcm: @@ -21,7 +21,7 @@ def get_pcm_frame(self, buf: memoryview) -> PcmFrame: frame.set_sample_rate(self.config.sample_rate) frame.set_number_of_channels(self.config.num_channels) frame.set_timestamp(self.config.timestamp) - frame.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_NON_INTERLEAVE) + frame.set_data_fmt(PcmFrameDataFmt.INTERLEAVE) frame.set_samples_per_channel( self.config.samples_per_channel // self.config.channel ) diff --git a/agents/addon/extension/interrupt_detector/manifest.json b/agents/addon/extension/interrupt_detector/manifest.json index 2a29ba1f..f4c88008 100644 --- a/agents/addon/extension/interrupt_detector/manifest.json +++ b/agents/addon/extension/interrupt_detector/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_go", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py index 407d7816..8e69499c 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py @@ -16,19 +16,9 @@ @register_addon_as_extension("interrupt_detector_python") class InterruptDetectorExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .interrupt_detector_extension import InterruptDetectorExtension rte.on_create_instance_done(InterruptDetectorExtension(addon_name), context) - - def on_deinit(self, rte: RteEnv) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py index 510b4e1e..315aae52 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -25,12 +25,6 @@ class InterruptDetectorExtension(Extension): - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("on_start") rte.on_start_done() @@ -39,10 +33,6 @@ def on_stop(self, rte: RteEnv) -> None: logger.info("on_stop") rte.on_stop_done() - def on_deinit(self, rte: RteEnv) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("on_cmd") cmd_json = cmd.to_json() diff --git a/agents/addon/extension/interrupt_detector_python/manifest.json b/agents/addon/extension/interrupt_detector_python/manifest.json index 692aea02..c092d77e 100644 --- a/agents/addon/extension/interrupt_detector_python/manifest.json +++ b/agents/addon/extension/interrupt_detector_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt/manifest.json b/agents/addon/extension/openai_chatgpt/manifest.json index a2fc4f61..8b1720d0 100644 --- a/agents/addon/extension/openai_chatgpt/manifest.json +++ b/agents/addon/extension/openai_chatgpt/manifest.json @@ -8,7 +8,7 @@ { "type": "system", "name": "rte_runtime_go", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go index a4492f67..34534a91 100644 --- a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go +++ b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go @@ -208,7 +208,7 @@ func (p *openaiChatGPTExtension) OnCmd( cmdName, err := cmd.GetName() if err != nil { slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - cmdResult, _ := rte.NewCmdResult(rte.Error) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeError) rteEnv.ReturnResult(cmdResult, cmd) return } @@ -224,20 +224,20 @@ func (p *openaiChatGPTExtension) OnCmd( outCmd, err := rte.NewCmd(cmdOutFlush) if err != nil { slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - cmdResult, _ := rte.NewCmdResult(rte.Error) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeError) rteEnv.ReturnResult(cmdResult, cmd) return } if err := rteEnv.SendCmd(outCmd, nil); err != nil { slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - cmdResult, _ := rte.NewCmdResult(rte.Error) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeError) rteEnv.ReturnResult(cmdResult, cmd) return } else { slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) } } - cmdResult, _ := rte.NewCmdResult(rte.Ok) + cmdResult, _ := rte.NewCmdResult(rte.StatusCodeOk) rteEnv.ReturnResult(cmdResult, cmd) } diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json index 9fc1b362..065f9a94 100644 --- a/agents/addon/extension/openai_chatgpt_python/manifest.json +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py index 537eda5b..d9b3e0b9 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py @@ -15,19 +15,9 @@ @register_addon_as_extension("openai_chatgpt_python") class OpenAIChatGPTExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("OpenAIChatGPTExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .openai_chatgpt_extension import OpenAIChatGPTExtension rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) - - def on_deinit(self, rte: RteEnv) -> None: - logger.info("OpenAIChatGPTExtensionAddon on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 6cabe6cb..60ddf256 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -79,12 +79,6 @@ class OpenAIChatGPTExtension(Extension): outdate_ts = 0 openai_chatgpt = None - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("OpenAIChatGPTExtension on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtension on_start") # Prepare configuration @@ -206,10 +200,6 @@ def on_stop(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtension on_stop") rte.on_stop_done() - def on_deinit(self, rte: RteEnv) -> None: - logger.info("OpenAIChatGPTExtension on_deinit") - rte.on_deinit_done() - def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("OpenAIChatGPTExtension on_cmd") cmd_json = cmd.to_json() diff --git a/agents/addon/extension/polly_tts/README.md b/agents/addon/extension/polly_tts/README.md new file mode 100644 index 00000000..92823a6b --- /dev/null +++ b/agents/addon/extension/polly_tts/README.md @@ -0,0 +1,11 @@ +## Amazon Polly TTS Extension + +### Configurations + +You can config this extension by providing following environments: + +| Env | Required | Default | Notes | +| -- | -- | -- | -- | +| AWS_TTS_REGION | No | us-east-1 | The Region of Amazon Bedrock service you want to use. | +| AWS_TTS_ACCESS_KEY_ID | No | - | Access Key of your IAM User, make sure you've set proper permissions to [synthesize speech](https://docs.aws.amazon.com/polly/latest/dg/security_iam_id-based-policy-examples.html#example-managed-policy-service-admin). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | +| AWS_TTS_SECRET_ACCESS_KEY | No | - | Secret Key of your IAM User, make sure you've set proper permissions to [synthesize speech](https://docs.aws.amazon.com/polly/latest/dg/security_iam_id-based-policy-examples.html#example-managed-policy-service-admin). Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | \ No newline at end of file diff --git a/agents/addon/extension/polly_tts/__init__.py b/agents/addon/extension/polly_tts/__init__.py new file mode 100644 index 00000000..0f60ee72 --- /dev/null +++ b/agents/addon/extension/polly_tts/__init__.py @@ -0,0 +1,3 @@ +from . import main + +print("polly_tts_python extension loaded") diff --git a/agents/addon/extension/polly_tts/log.py b/agents/addon/extension/polly_tts/log.py new file mode 100644 index 00000000..dd7d68cf --- /dev/null +++ b/agents/addon/extension/polly_tts/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("polly_tts_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/polly_tts/main.py b/agents/addon/extension/polly_tts/main.py new file mode 100644 index 00000000..585a18a0 --- /dev/null +++ b/agents/addon/extension/polly_tts/main.py @@ -0,0 +1,183 @@ +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + PcmFrame, + RTE_PCM_FRAME_DATA_FMT, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) + +import queue +import threading +from datetime import datetime +import traceback +from contextlib import closing + +from .log import logger +from .polly_wrapper import PollyWrapper, PollyConfig + +PROPERTY_REGION = "region" # Optional +PROPERTY_ACCESS_KEY = "access_key" # Optional +PROPERTY_SECRET_KEY = "secret_key" # Optional +PROPERTY_ENGINE = 'engine' # Optional +PROPERTY_VOICE = 'voice' # Optional +PROPERTY_SAMPLE_RATE = 'sample_rate'# Optional +PROPERTY_LANG_CODE = 'lang_code' # Optional + + +class PollyTTSExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + + self.outdateTs = datetime.now() + self.stopped = False + self.thread = None + self.queue = queue.Queue() + self.frame_size = None + + self.bytes_per_sample = 2 + self.number_of_channels = 1 + + def on_init( + self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("PollyTTSExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + logger.info("PollyTTSExtension on_start") + + polly_config = PollyConfig.default_config() + + for optional_param in [PROPERTY_REGION, PROPERTY_ENGINE, PROPERTY_VOICE, + PROPERTY_SAMPLE_RATE, PROPERTY_LANG_CODE, + PROPERTY_ACCESS_KEY, PROPERTY_SECRET_KEY ]: + try: + value = rte.get_property_string(optional_param).strip() + if value: + polly_config.__setattr__(optional_param, value) + except Exception as err: + logger.debug(f"GetProperty optional {optional_param} failed, err: {err}. Using default value: {polly_config.__getattribute__(optional_param)}") + + self.polly = PollyWrapper(polly_config) + self.frame_size = int(int(polly_config.sample_rate) * self.number_of_channels * self.bytes_per_sample / 100) + + self.thread = threading.Thread(target=self.async_polly_handler, args=[rte]) + self.thread.start() + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + logger.info("PollyTTSExtension on_stop") + + self.stopped = True + self.queue.put(None) + self.flush() + self.thread.join() + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + logger.info("PollyTTSExtension on_deinit") + rte.on_deinit_done() + + def need_interrupt(self, ts: datetime.time) -> bool: + return (self.outdateTs - ts).total_seconds() > 1 + + + def __get_frame(self, data: bytes) -> PcmFrame: + sample_rate = int(self.polly.config.sample_rate) + + f = PcmFrame.create("pcm_frame") + f.set_sample_rate(sample_rate) + f.set_bytes_per_sample(2) + f.set_number_of_channels(1) + + f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE) + f.set_samples_per_channel(sample_rate // 100) + f.alloc_buf(self.frame_size) + buff = f.lock_buf() + if len(data) < self.frame_size: + buff[:] = bytes(self.frame_size) #fill with 0 + buff[:len(data)] = data + f.unlock_buf(buff) + return f + + def async_polly_handler(self, rte: Rte): + while not self.stopped: + value = self.queue.get() + if value is None: + logger.warning("async_polly_handler: exit due to None value got.") + break + inputText, ts = value + if len(inputText) == 0: + logger.warning("async_polly_handler: empty input detected.") + continue + try: + audio_stream, visemes = self.polly.synthesize(inputText) + with closing(audio_stream) as stream: + for chunk in stream.iter_chunks(chunk_size=self.frame_size): + if self.need_interrupt(ts): + logger.debug("async_polly_handler: got interrupt cmd, stop sending pcm frame.") + break + + f = self.__get_frame(chunk) + rte.send_pcm_frame(f) + except Exception as e: + logger.exception(e) + logger.exception(traceback.format_exc()) + + def flush(self): + logger.info("PollyTTSExtension flush") + while not self.queue.empty(): + self.queue.get() + self.queue.put(("", datetime.now())) + + def on_data(self, rte: Rte, data: Data) -> None: + logger.info("PollyTTSExtension on_data") + inputText = data.get_property_string("text") + if len(inputText) == 0: + logger.info("ignore empty text") + return + + is_end = data.get_property_bool("end_of_segment") + + logger.info("on data %s %d", inputText, is_end) + self.queue.put((inputText, datetime.now())) + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + logger.info("PollyTTSExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("PollyTTSExtension on_cmd json: %s" + cmd_json) + + cmdName = cmd.get_name() + if cmdName == "flush": + self.outdateTs = datetime.now() + self.flush() + cmd_out = Cmd.create("flush") + rte.send_cmd(cmd_out, lambda rte, result: print("PollyTTSExtension send_cmd done")) + else: + logger.info("unknown cmd %s", cmdName) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + +@register_addon_as_extension("polly_tts") +class PollyTTSExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("PollyTTSExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(PollyTTSExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("PollyTTSExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/polly_tts/manifest.json b/agents/addon/extension/polly_tts/manifest.json new file mode 100644 index 00000000..5819f043 --- /dev/null +++ b/agents/addon/extension/polly_tts/manifest.json @@ -0,0 +1,63 @@ +{ + "type": "extension", + "name": "polly_tts", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.4.0" + } + ], + "api": { + "property": { + "region": { + "type": "string" + }, + "access_key": { + "type": "string" + }, + "secret_key": { + "type": "string" + }, + "engine": { + "type": "string" + }, + "voice": { + "type": "string" + }, + "sample_rate": { + "type": "int64" + }, + "lang_code": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "pcm_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} diff --git a/agents/addon/extension/polly_tts/polly_wrapper.py b/agents/addon/extension/polly_tts/polly_wrapper.py new file mode 100644 index 00000000..0f1759bf --- /dev/null +++ b/agents/addon/extension/polly_tts/polly_wrapper.py @@ -0,0 +1,175 @@ +import io +import json +import logging +import boto3 +from typing import Union +from botocore.exceptions import ClientError + +from .log import logger + +# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/polly/client/synthesize_speech.html +class PollyConfig: + def __init__(self, + region: str, + access_key: str, + secret_key: str, + voice: str, + engine: str, # 'standard'|'neural'|'long-form'|'generative' + sample_rate: Union[str, int], + lang_code: None): # only necessary if using a bilingual voice + self.region = region + self.access_key = access_key + self.secret_key = secret_key + + self.voice = voice + self.engine = engine + self.lang_code = lang_code + self.sample_rate = str(sample_rate) + + self.speech_mark_type = 'sentence' # 'sentence'|'ssml'|'viseme'|'word' + self.audio_format = 'pcm' # 'json'|'mp3'|'ogg_vorbis'|'pcm' + self.include_visemes = False + + @classmethod + def default_config(cls): + return cls( + region="us-east-1", + access_key="", + secret_key="", + engine="generative", + voice="Matthew", # https://docs.aws.amazon.com/polly/latest/dg/available-voices.html + sample_rate=16000, + lang_code='en-US' + ) + + +class PollyWrapper: + """Encapsulates Amazon Polly functions.""" + + def __init__(self, config: PollyConfig): + """ + :param config: A PollyConfig + """ + + self.config = config + + if config.access_key and config.secret_key: + logger.info(f"PollyTTS initialized with access key: {config.access_key}") + + self.client = boto3.client(service_name='polly', + region_name=config.region, + aws_access_key_id=config.access_key, + aws_secret_access_key=config.secret_key) + else: + logger.info(f"PollyTTS initialized without access key, using default credentials provider chain.") + self.client = boto3.client(service_name='polly', region_name=config.region) + + self.voice_metadata = None + + + def describe_voices(self): + """ + Gets metadata about available voices. + + :return: The list of voice metadata. + """ + try: + response = self.client.describe_voices() + self.voice_metadata = response["Voices"] + logger.info("Got metadata about %s voices.", len(self.voice_metadata)) + except ClientError: + logger.exception("Couldn't get voice metadata.") + raise + else: + return self.voice_metadata + + + def synthesize(self, text): + """ + Synthesizes speech or speech marks from text, using the specified voice. + + :param text: The text to synthesize. + :return: The audio stream that contains the synthesized speech and a list + of visemes that are associated with the speech audio. + """ + try: + kwargs = { + "Engine": self.config.engine, + "OutputFormat": self.config.audio_format, + "Text": text, + "VoiceId": self.config.voice, + } + if self.config.lang_code is not None: + kwargs["LanguageCode"] = self.config.lang_code + response = self.client.synthesize_speech(**kwargs) + audio_stream = response["AudioStream"] + logger.info("Got audio stream spoken by %s.", self.config.voice) + visemes = None + if self.config.include_visemes: + kwargs["OutputFormat"] = "json" + kwargs["SpeechMarkTypes"] = ["viseme"] + response = self.client.synthesize_speech(**kwargs) + visemes = [ + json.loads(v) + for v in response["AudioStream"].read().decode().split() + if v + ] + logger.info("Got %s visemes.", len(visemes)) + except ClientError: + logger.exception("Couldn't get audio stream.") + raise + else: + return audio_stream, visemes + + def get_voice_engines(self): + """ + Extracts the set of available voice engine types from the full list of + voice metadata. + + :return: The set of voice engine types. + """ + if self.voice_metadata is None: + self.describe_voices() + + engines = set() + for voice in self.voice_metadata: + for engine in voice["SupportedEngines"]: + engines.add(engine) + return engines + + + def get_languages(self, engine): + """ + Extracts the set of available languages for the specified engine from the + full list of voice metadata. + + :param engine: The engine type to filter on. + :return: The set of languages available for the specified engine type. + """ + if self.voice_metadata is None: + self.describe_voices() + + return { + vo["LanguageName"]: vo["LanguageCode"] + for vo in self.voice_metadata + if engine in vo["SupportedEngines"] + } + + + def get_voices(self, engine, language_code): + """ + Extracts the set of voices that are available for the specified engine type + and language from the full list of voice metadata. + + :param engine: The engine type to filter on. + :param language_code: The language to filter on. + :return: The set of voices available for the specified engine type and language. + """ + if self.voice_metadata is None: + self.describe_voices() + + return { + vo["Name"]: vo["Id"] + for vo in self.voice_metadata + if engine in vo["SupportedEngines"] and language_code == vo["LanguageCode"] + } \ No newline at end of file diff --git a/agents/addon/extension/polly_tts/requirements.txt b/agents/addon/extension/polly_tts/requirements.txt new file mode 100644 index 00000000..0a92c2be --- /dev/null +++ b/agents/addon/extension/polly_tts/requirements.txt @@ -0,0 +1 @@ +boto3==1.34.143 \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/manifest.json b/agents/addon/extension/qwen_llm_python/manifest.json index 02b7a98e..097d87b4 100644 --- a/agents/addon/extension/qwen_llm_python/manifest.json +++ b/agents/addon/extension/qwen_llm_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.3.1" + "version": "0.4.0" } ], "api": { diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py index b635f64e..c0a3fa94 100644 --- a/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py @@ -15,11 +15,6 @@ @register_addon_as_extension("qwen_llm_python") class QWenLLMExtensionAddon(Addon): - def on_init(self, rte: RteEnv, manifest, property) -> None: - logger.info("QWenLLMExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - def on_create_instance(self, rte: RteEnv, addon_name: str, context): logger.info("on_create_instance") @@ -27,7 +22,3 @@ def on_create_instance(self, rte: RteEnv, addon_name: str, context): rte.on_create_instance_done(QWenLLMExtension(addon_name), context) - def on_deinit(self, rte: RteEnv) -> None: - logger.info("QWenLLMExtensionAddon on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py index 1b34e20b..bcb7612b 100644 --- a/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py @@ -161,12 +161,6 @@ def call_with_stream( self.on_msg("assistant", total) logger.info("on response %s", total) - def on_init( - self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo - ) -> None: - logger.info("QWenLLMExtension on_init") - rte.on_init_done(manifest, property) - def on_start(self, rte: RteEnv) -> None: logger.info("QWenLLMExtension on_start") self.api_key = rte.get_property_string("api_key") @@ -186,11 +180,7 @@ def on_stop(self, rte: RteEnv) -> None: self.flush() self.thread.join() rte.on_stop_done() - - def on_deinit(self, rte: RteEnv) -> None: - logger.info("QWenLLMExtension on_deinit") - rte.on_deinit_done() - + def flush(self): logger.info("QWenLLMExtension flush") while not self.queue.empty(): diff --git a/agents/main.go b/agents/main.go index cf91296d..97b9ef1d 100644 --- a/agents/main.go +++ b/agents/main.go @@ -15,7 +15,7 @@ import ( ) type appConfig struct { - Manifest string + PropertyFilePath string } type defaultApp struct { @@ -26,15 +26,14 @@ type defaultApp struct { func (p *defaultApp) OnInit( rteEnv rte.RteEnv, - manifest rte.MetadataInfo, property rte.MetadataInfo, ) { - // Using the default manifest.json if not specified. - if len(p.cfg.Manifest) > 0 { - manifest.Set(rte.MetadataTypeJSONFileName, p.cfg.Manifest) + // Using the default property.json if not specified. + if len(p.cfg.PropertyFilePath) > 0 { + property.Set(rte.MetadataTypeJSONFileName, p.cfg.PropertyFilePath) } - rteEnv.OnInitDone(manifest, property) + rteEnv.OnInitDone(property) } func startAppBlocking(cfg *appConfig) { @@ -62,7 +61,7 @@ func main() { cfg := &appConfig{} - flag.StringVar(&cfg.Manifest, "manifest", "", "The absolute path of manifest.json") + flag.StringVar(&cfg.PropertyFilePath, "property", "", "The absolute path of property.json") flag.Parse() startAppBlocking(cfg) diff --git a/agents/manifest.json b/agents/manifest.json new file mode 100644 index 00000000..571aaf12 --- /dev/null +++ b/agents/manifest.json @@ -0,0 +1,33 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.4.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.4.0" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.4.0" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.4.0" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.4.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.4.0" + } + ] +} \ No newline at end of file diff --git a/agents/manifest.json.cn.bedrock.example b/agents/manifest.json.cn.bedrock.example deleted file mode 100644 index 0a8ae550..00000000 --- a/agents/manifest.json.cn.bedrock.example +++ /dev/null @@ -1,210 +0,0 @@ -{ - "type": "app", - "name": "astra_agents", - "version": "0.2.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.3.1" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "py_init_extension_cpp", - "version": "0.3.1" - }, - { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "agora_rtc", - "version": "0.3.0-rc1" - } - ], - "predefined_graphs": [ - { - "name": "astra_agents", - "auto_start": true, - "nodes": [ - { - "type": "extension", - "extension_group": "default", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "enable_agora_asr": true, - "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", - "agora_asr_vendor_key": "", - "agora_asr_vendor_region": "", - "agora_asr_session_control_file_path": "session_control.conf" - } - }, - { - "type": "extension", - "extension_group": "bedrock", - "addon": "bedrock_llm_python", - "name": "bedrock_llm", - "property": { - "region": "us-east-1", - "access_key": "", - "secret_key": "", - "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", - "max_tokens": 512, - "prompt": "", - "greeting": "ASTRA agent connected. How can i help you today?", - "max_memory_length": 10 - } - }, - { - "type": "extension", - "extension_group": "tts", - "addon": "azure_tts", - "name": "azure_tts", - "property": { - "azure_subscription_key": "", - "azure_subscription_region": "", - "azure_synthesis_voice_name": "en-US-JaneNeural" - } - }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector_python", - "name": "interrupt_detector" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "bedrock" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - } - ], - "connections": [ - { - "extension_group": "default", - "extension": "agora_rtc", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, - { - "extension_group": "bedrock", - "extension": "bedrock_llm" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "bedrock", - "extension": "bedrock_llm" - } - ] - } - ] - }, - { - "extension_group": "bedrock", - "extension": "bedrock_llm", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "tts", - "extension": "azure_tts" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "tts", - "extension": "azure_tts" - } - ] - } - ] - }, - { - "extension_group": "tts", - "extension": "azure_tts", - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "bedrock", - "extension": "bedrock_llm" - } - ] - } - ] - } - ] - } - ] -} diff --git a/agents/manifest.json.cn.openai.example b/agents/manifest.json.cn.openai.example deleted file mode 100644 index 3f0625d6..00000000 --- a/agents/manifest.json.cn.openai.example +++ /dev/null @@ -1,212 +0,0 @@ -{ - "type": "app", - "name": "astra_agents", - "version": "0.2.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.3.1" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "py_init_extension_cpp", - "version": "0.3.1" - }, - { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "agora_rtc", - "version": "0.3.0-rc1" - } - ], - "predefined_graphs": [ - { - "name": "astra_agents", - "auto_start": true, - "nodes": [ - { - "type": "extension", - "extension_group": "default", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "enable_agora_asr": true, - "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", - "agora_asr_vendor_key": "", - "agora_asr_vendor_region": "", - "agora_asr_session_control_file_path": "session_control.conf" - } - }, - { - "type": "extension", - "extension_group": "chatgpt", - "addon": "openai_chatgpt_python", - "name": "openai_chatgpt", - "property": { - "base_url": "", - "api_key": "", - "frequency_penalty": 0.9, - "model": "gpt-3.5-turbo", - "max_tokens": 512, - "prompt": "", - "proxy_url": "", - "greeting": "ASTRA agent connected. How can i help you today?", - "max_memory_length": 10 - } - }, - { - "type": "extension", - "extension_group": "tts", - "addon": "cosy_tts", - "name": "cosy_tts", - "property": { - "api_key": "", - "model": "cosyvoice-v1", - "voice": "longxiaochun", - "sample_rate": 16000 - } - }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector_python", - "name": "interrupt_detector" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "chatgpt" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - } - ], - "connections": [ - { - "extension_group": "default", - "extension": "agora_rtc", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ] - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "tts", - "extension": "cosy_tts" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "tts", - "extension": "cosy_tts" - } - ] - } - ] - }, - { - "extension_group": "tts", - "extension": "cosy_tts", - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ] - } - ] - } - ] -} diff --git a/agents/manifest.json.elevenlabs.example b/agents/manifest.json.elevenlabs.example deleted file mode 100644 index e4e45ae6..00000000 --- a/agents/manifest.json.elevenlabs.example +++ /dev/null @@ -1,260 +0,0 @@ -{ - "type": "app", - "name": "astra_agents", - "version": "0.1.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.3.1" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "py_init_extension_cpp", - "version": "0.3.1" - }, - { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "agora_rtc", - "version": "0.3.0-rc1" - } - ], - "predefined_graphs": [ - { - "name": "astra_agents", - "auto_start": true, - "nodes": [ - { - "type": "extension", - "extension_group": "default", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "enable_agora_asr": true, - "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", - "agora_asr_vendor_key": "", - "agora_asr_vendor_region": "", - "agora_asr_session_control_file_path": "session_control.conf" - } - }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector", - "name": "interrupt_detector" - }, - { - "type": "extension", - "extension_group": "chatgpt", - "addon": "openai_chatgpt", - "name": "openai_chatgpt", - "property": { - "base_url": "", - "api_key": "", - "frequency_penalty": 0.9, - "model": "gpt-3.5-turbo", - "max_tokens": 512, - "prompt": "", - "proxy_url": "", - "greeting": "ASTRA agent connected. How can i help you today?", - "max_memory_length": 10 - } - }, - { - "type": "extension", - "extension_group": "tts", - "addon": "elevenlabs_tts", - "name": "elevenlabs_tts", - "property": { - "api_key": "", - "model_id": "eleven_multilingual_v2", - "optimize_streaming_latency": 0, - "request_timeout_seconds": 30, - "similarity_boost": 0.75, - "speaker_boost": false, - "stability": 0.5, - "style": 0.0, - "voice_id": "pNInz6obpgDQGcFmaJgB" - } - }, - { - "type": "extension", - "extension_group": "transcriber", - "addon": "chat_transcriber", - "name": "chat_transcriber" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "chatgpt" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "transcriber" - } - ], - "connections": [ - { - "extension_group": "default", - "extension": "agora_rtc", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber" - } - ] - } - ] - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "tts", - "extension": "elevenlabs_tts" - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber", - "cmd_conversions": [ - { - "cmd": { - "type": "per_property", - "keep_original": true, - "rules": [ - { - "path": "is_final", - "type": "fixed_value", - "value": "bool(true)" - }, - { - "path": "stream_id", - "type": "fixed_value", - "value": "uint32(999)" - } - ] - } - } - ] - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "tts", - "extension": "elevenlabs_tts" - } - ] - } - ] - }, - { - "extension_group": "tts", - "extension": "elevenlabs_tts", - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber", - "data": [ - { - "name": "data", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ] - } - ] - } - ] -} diff --git a/agents/manifest.json.en.bedrock.example b/agents/manifest.json.en.bedrock.example deleted file mode 100644 index 0a8ae550..00000000 --- a/agents/manifest.json.en.bedrock.example +++ /dev/null @@ -1,210 +0,0 @@ -{ - "type": "app", - "name": "astra_agents", - "version": "0.2.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.3.1" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "py_init_extension_cpp", - "version": "0.3.1" - }, - { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "agora_rtc", - "version": "0.3.0-rc1" - } - ], - "predefined_graphs": [ - { - "name": "astra_agents", - "auto_start": true, - "nodes": [ - { - "type": "extension", - "extension_group": "default", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "enable_agora_asr": true, - "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", - "agora_asr_vendor_key": "", - "agora_asr_vendor_region": "", - "agora_asr_session_control_file_path": "session_control.conf" - } - }, - { - "type": "extension", - "extension_group": "bedrock", - "addon": "bedrock_llm_python", - "name": "bedrock_llm", - "property": { - "region": "us-east-1", - "access_key": "", - "secret_key": "", - "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", - "max_tokens": 512, - "prompt": "", - "greeting": "ASTRA agent connected. How can i help you today?", - "max_memory_length": 10 - } - }, - { - "type": "extension", - "extension_group": "tts", - "addon": "azure_tts", - "name": "azure_tts", - "property": { - "azure_subscription_key": "", - "azure_subscription_region": "", - "azure_synthesis_voice_name": "en-US-JaneNeural" - } - }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector_python", - "name": "interrupt_detector" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "bedrock" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - } - ], - "connections": [ - { - "extension_group": "default", - "extension": "agora_rtc", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, - { - "extension_group": "bedrock", - "extension": "bedrock_llm" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "bedrock", - "extension": "bedrock_llm" - } - ] - } - ] - }, - { - "extension_group": "bedrock", - "extension": "bedrock_llm", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "tts", - "extension": "azure_tts" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "tts", - "extension": "azure_tts" - } - ] - } - ] - }, - { - "extension_group": "tts", - "extension": "azure_tts", - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "bedrock", - "extension": "bedrock_llm" - } - ] - } - ] - } - ] - } - ] -} diff --git a/agents/manifest.json.example b/agents/manifest.json.example deleted file mode 100644 index 8bbd42ba..00000000 --- a/agents/manifest.json.example +++ /dev/null @@ -1,265 +0,0 @@ -{ - "type": "app", - "name": "astra_agents", - "version": "0.2.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.3.1" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "py_init_extension_cpp", - "version": "0.3.1" - }, - { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "agora_rtc", - "version": "0.3.0-rc1" - } - ], - "predefined_graphs": [ - { - "name": "astra_agents", - "auto_start": true, - "nodes": [ - { - "type": "extension", - "extension_group": "default", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "enable_agora_asr": true, - "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", - "agora_asr_vendor_key": "", - "agora_asr_vendor_region": "", - "agora_asr_session_control_file_path": "session_control.conf" - } - }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector", - "name": "interrupt_detector" - }, - { - "type": "extension", - "extension_group": "chatgpt", - "addon": "openai_chatgpt", - "name": "openai_chatgpt", - "property": { - "base_url": "", - "api_key": "", - "frequency_penalty": 0.9, - "model": "gpt-3.5-turbo", - "max_tokens": 512, - "prompt": "", - "proxy_url": "", - "greeting": "ASTRA agent connected. How can i help you today?", - "max_memory_length": 10 - } - }, - { - "type": "extension", - "extension_group": "tts", - "addon": "azure_tts", - "name": "azure_tts", - "property": { - "azure_subscription_key": "", - "azure_subscription_region": "", - "azure_synthesis_voice_name": "en-US-JaneNeural" - } - }, - { - "type": "extension", - "extension_group": "transcriber", - "addon": "chat_transcriber", - "name": "chat_transcriber" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "chatgpt" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "transcriber" - } - ], - "connections": [ - { - "extension_group": "default", - "extension": "agora_rtc", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - } - ] - } - ] - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "tts", - "extension": "azure_tts" - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber", - "cmd_conversions": [ - { - "cmd": { - "type": "per_property", - "keep_original": true, - "rules": [ - { - "path": "is_final", - "type": "fixed_value", - "value": "bool(true)" - }, - { - "path": "stream_id", - "type": "fixed_value", - "value": "uint32(999)" - } - ] - } - } - ] - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "tts", - "extension": "azure_tts" - } - ] - } - ] - }, - { - "extension_group": "tts", - "extension": "azure_tts", - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber", - "data": [ - { - "name": "data", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ] - } - ] - } - ] -} diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example deleted file mode 100644 index f7481e23..00000000 --- a/agents/manifest.json.qwen.example +++ /dev/null @@ -1,269 +0,0 @@ -{ - "type": "app", - "name": "astra_agents", - "version": "0.2.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.3.1" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "py_init_extension_cpp", - "version": "0.3.1" - }, - { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.3.1" - }, - { - "type": "extension", - "name": "agora_rtc", - "version": "0.3.0-rc1" - } - ], - "predefined_graphs": [ - { - "name": "astra_agents", - "auto_start": true, - "nodes": [ - { - "type": "extension", - "extension_group": "default", - "addon": "agora_rtc", - "name": "agora_rtc", - "property": { - "app_id": "", - "token": "", - "channel": "astra_agents_test", - "stream_id": 1234, - "remote_stream_id": 123, - "subscribe_audio": true, - "publish_audio": true, - "publish_data": true, - "enable_agora_asr": true, - "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", - "agora_asr_vendor_key": "", - "agora_asr_vendor_region": "", - "agora_asr_session_control_file_path": "session_control.conf" - } - }, - { - "type": "extension", - "extension_group": "llm", - "addon": "qwen_llm_python", - "name": "qwen_llm", - "property": { - "api_key": "", - "model": "qwen-max", - "max_tokens": 512, - "prompt": "", - "max_memory_length": 10 - } - }, - { - "type": "extension", - "extension_group": "tts", - "addon": "cosy_tts", - "name": "cosy_tts", - "property": { - "api_key": "", - "model": "cosyvoice-v1", - "voice": "longxiaochun", - "sample_rate": 16000 - } - }, - { - "type": "extension", - "extension_group": "chat_transcriber", - "addon": "chat_transcriber_python", - "name": "chat_transcriber" - }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector_python", - "name": "interrupt_detector" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "default" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "llm" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "tts" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "chat_transcriber" - } - ], - "connections": [ - { - "extension_group": "default", - "extension": "agora_rtc", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, - { - "extension_group": "chat_transcriber", - "extension": "chat_transcriber" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "llm", - "extension": "qwen_llm" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "llm", - "extension": "qwen_llm" - } - ] - } - ], - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "llm", - "extension": "qwen_llm" - } - ] - } - ] - }, - { - "extension_group": "llm", - "extension": "qwen_llm", - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "tts", - "extension": "cosy_tts" - }, - { - "extension_group": "chat_transcriber", - "extension": "chat_transcriber", - "cmd_conversions": [ - { - "cmd": { - "type": "per_property", - "keep_original": true, - "rules": [ - { - "path": "is_final", - "type": "fixed_value", - "value": "bool(true)" - }, - { - "path": "stream_id", - "type": "fixed_value", - "value": "uint32(999)" - } - ] - } - } - ] - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "tts", - "extension": "cosy_tts" - } - ] - } - ] - }, - { - "extension_group": "tts", - "extension": "cosy_tts", - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ], - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "chat_transcriber", - "extension": "chat_transcriber", - "data": [ - { - "name": "data", - "dest":[ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - } - ] - } - ] -} diff --git a/agents/property.json b/agents/property.json deleted file mode 100644 index 64810e6a..00000000 --- a/agents/property.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "rte": { - "log_level": 3 - } -} \ No newline at end of file diff --git a/agents/property.json.example b/agents/property.json.example new file mode 100644 index 00000000..e3af5b44 --- /dev/null +++ b/agents/property.json.example @@ -0,0 +1,1171 @@ +{ + "rte": { + "predefined_graphs": [ + { + "name": "va.openai.azure", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "chat_transcriber", + "name": "chat_transcriber" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + }, + { + "name": "va.openai.11labs", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "elevenlabs_tts", + "name": "elevenlabs_tts", + "property": { + "api_key": "", + "model_id": "eleven_multilingual_v2", + "optimize_streaming_latency": 0, + "request_timeout_seconds": 30, + "similarity_boost": 0.75, + "speaker_boost": false, + "stability": 0.5, + "style": 0.0, + "voice_id": "pNInz6obpgDQGcFmaJgB" + } + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "chat_transcriber", + "name": "chat_transcriber" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "elevenlabs_tts" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "elevenlabs_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "elevenlabs_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + }, + { + "name": "va.bedrock.azure", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + }, + { + "name": "va.openai.cosy", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt_python", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + }, + { + "name": "va.qwen.cosy", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "llm", + "addon": "qwen_llm_python", + "name": "qwen_llm", + "property": { + "api_key": "", + "model": "qwen-max", + "max_tokens": 512, + "prompt": "", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "chat_transcriber", + "addon": "chat_transcriber_python", + "name": "chat_transcriber" + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "llm" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chat_transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ], + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "llm", + "extension": "qwen_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest":[ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + } + ] + }, + { + "name": "va.bedrock.polly", + "auto_start": false, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "polly_tts", + "name": "polly_tts", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "engine": "generative", + "voice": "Ruth", + "sample_rate": "16000", + "lang_code": "en-US" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "polly_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "polly_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "polly_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + } + ] + } +} diff --git a/agents/scripts/BUILD.gn b/agents/scripts/BUILD.gn index 73628981..c9fa5f1f 100644 --- a/agents/scripts/BUILD.gn +++ b/agents/scripts/BUILD.gn @@ -31,7 +31,6 @@ rte_package("default_app_cpp") { package_kind = "app" manifest = "manifest.json" - property = "property.json" if (package_type == "develop") { # It's 'develop' package, therefore, need to build the result. diff --git a/agents/scripts/package.sh b/agents/scripts/package.sh index 4c3ba1d0..eedb734d 100755 --- a/agents/scripts/package.sh +++ b/agents/scripts/package.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash -APP_HOME=$(cd $(dirname $0)/..; pwd) +APP_HOME=$( + cd $(dirname $0)/.. + pwd +) cd $APP_HOME @@ -8,49 +11,46 @@ rm -rf .release mkdir .release copy_extension() { - local extension=$1 - mkdir -p .release/addon/extension/$extension + local extension=$1 + mkdir -p .release/addon/extension/$extension - if [[ -d addon/extension/$extension/lib ]]; then - cp -r addon/extension/$extension/lib .release/addon/extension/$extension/ - fi + if [[ -d addon/extension/$extension/lib ]]; then + cp -r addon/extension/$extension/lib .release/addon/extension/$extension/ + fi + + if [[ -f addon/extension/$extension/manifest.json ]]; then + cp addon/extension/$extension/manifest.json .release/addon/extension/$extension/ - if [[ -f addon/extension/$extension/manifest.json ]]; then - cp addon/extension/$extension/manifest.json .release/addon/extension/$extension/ + # package .py for python extensions + EXTENSION_LANGUAGE=$(jq -r '.language' addon/extension/$extension/manifest.json) + if [[ $EXTENSION_LANGUAGE == "python" ]]; then + # TODO: package 'publish' contents only + cp addon/extension/$extension/*.py .release/addon/extension/$extension/ + if [[ -f addon/extension/$extension/requirements.txt ]]; then + cp addon/extension/$extension/requirements.txt .release/addon/extension/$extension/ + fi - # package .py for python extensions - EXTENSION_LANGUAGE=$(jq -r '.language' addon/extension/$extension/manifest.json) - if [[ $EXTENSION_LANGUAGE == "python" ]]; then - # TODO: package 'publish' contents only - cp addon/extension/$extension/*.py .release/addon/extension/$extension/ - if [[ -f addon/extension/$extension/requirements.txt ]]; then - cp addon/extension/$extension/requirements.txt .release/addon/extension/$extension/ - fi - - # TODO: copy specific contents - if [[ -d addon/extension/$extension/pb ]]; then - cp -r addon/extension/$extension/pb .release/addon/extension/$extension/ - fi + # TODO: copy specific contents + if [[ -d addon/extension/$extension/pb ]]; then + cp -r addon/extension/$extension/pb .release/addon/extension/$extension/ + fi + fi fi - fi - if [[ -f addon/extension/$extension/property.json ]]; then - cp addon/extension/$extension/property.json .release/addon/extension/$extension/ - fi + if [[ -f addon/extension/$extension/property.json ]]; then + cp addon/extension/$extension/property.json .release/addon/extension/$extension/ + fi } cp -r bin .release cp -r lib .release cp manifest.json .release -#cp manifest.elevenlabs.json .release -cp manifest.cn.json .release -cp manifest.en.json .release cp property.json .release # python deps if [[ -d interface/rte ]]; then - mkdir -p .release/interface - cp -r interface/rte .release/interface + mkdir -p .release/interface + cp -r interface/rte .release/interface fi # extension group @@ -59,12 +59,11 @@ cp -r addon/extension_group .release/addon/ # extensions mkdir -p .release/addon/extension -for extension in addon/extension/* -do - extension_name=$(basename $extension) - copy_extension $extension_name +for extension in addon/extension/*; do + extension_name=$(basename $extension) + copy_extension $extension_name done if [[ -f session_control.conf ]]; then - cp -r session_control.conf .release/ + cp -r session_control.conf .release/ fi diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..6772611f --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,46 @@ +services: + astra_agents_dev: + image: ghcr.io/rte-design/astra_agents_build:0.3.5 + container_name: astra_agents_dev + platform: linux/amd64 + tty: true + stdin_open: true + restart: always + command: "make run-gd-server" + ports: + - "${GRAPH_DESIGNER_SERVER_PORT}:${GRAPH_DESIGNER_SERVER_PORT}" + - "${SERVER_PORT}:${SERVER_PORT}" + volumes: + - ./:/app + - ${LOG_PATH}:${LOG_PATH} + working_dir: /app + environment: + AGORA_APP_ID: ${AGORA_APP_ID} + AGORA_APP_CERTIFICATE: ${AGORA_APP_CERTIFICATE} + AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} + AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} + AWS_BEDROCK_MODEL: ${AWS_BEDROCK_MODEL} + AWS_REGION: ${AWS_REGION} + AZURE_STT_KEY: ${AZURE_STT_KEY} + AZURE_STT_REGION: ${AZURE_STT_REGION} + AZURE_TTS_KEY: ${AZURE_TTS_KEY} + AZURE_TTS_REGION: ${AZURE_TTS_REGION} + COSY_TTS_KEY: ${COSY_TTS_KEY} + ELEVENLABS_TTS_KEY: ${ELEVENLABS_TTS_KEY} + OPENAI_API_KEY: ${OPENAI_API_KEY} + OPENAI_BASE_URL: ${OPENAI_BASE_URL} + OPENAI_MODEL: ${OPENAI_MODEL} + OPENAI_PROXY_URL: ${OPENAI_PROXY_URL} + QWEN_API_KEY: ${QWEN_API_KEY} + astra_playground: + image: agoraio/astra_playground:latest + container_name: astra_playground + restart: always + ports: + - "3000:3000" + astra_graph_designer: + image: agoraio/astra_graph_designer:latest + container_name: astra_graph_designer + restart: always + ports: + - "3001:3000" diff --git a/docker-compose.yml.example b/docker-compose.yml.example deleted file mode 100644 index d548cffb..00000000 --- a/docker-compose.yml.example +++ /dev/null @@ -1,34 +0,0 @@ -version: "3.8" - -services: - astra_agents_server: - image: agoraio/astra_agents_server:latest - container_name: astra_agents_server - restart: always - ports: - - "8080:8080" - volumes: - - /tmp:/tmp - environment: - # Agora App ID and Agora App Certificate - AGORA_APP_ID: - AGORA_APP_CERTIFICATE: - # Azure STT key and region - AZURE_STT_KEY: - AZURE_STT_REGION: - # OpenAI API key - OPENAI_API_KEY: - # TTS vendor: azure/elevenlabs - TTS_VENDOR_CHINESE: azure - TTS_VENDOR_ENGLISH: azure - # If you choose azure, you need to provide the following Azure STT key and region - AZURE_TTS_KEY: - AZURE_TTS_REGION: - # If you choose elevenlabs, you need to provide the following Elevenlabs TTS key - ELEVENLABS_TTS_KEY: - astra_playground: - image: agoraio/astra_playground:latest - container_name: astra_playground - restart: always - ports: - - "3000:3000" diff --git a/server/go.mod b/server/go.mod index 088a6b4b..c2324acc 100644 --- a/server/go.mod +++ b/server/go.mod @@ -7,6 +7,7 @@ require ( github.com/gin-gonic/gin v1.9.1 github.com/gogf/gf v1.16.9 github.com/google/uuid v1.6.0 + github.com/joho/godotenv v1.5.1 github.com/tidwall/gjson v1.17.1 github.com/tidwall/sjson v1.2.5 ) diff --git a/server/go.sum b/server/go.sum index 55cfe725..5be9e17a 100644 --- a/server/go.sum +++ b/server/go.sum @@ -59,6 +59,8 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grokify/html-strip-tags-go v0.0.1 h1:0fThFwLbW7P/kOiTBs03FsJSV9RM2M/Q/MOnCQxKMo0= github.com/grokify/html-strip-tags-go v0.0.1/go.mod h1:2Su6romC5/1VXOQMaWL2yb618ARB8iVo6/DR99A6d78= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= diff --git a/server/internal/code.go b/server/internal/code.go index e3f56e42..dfb6f6ca 100644 --- a/server/internal/code.go +++ b/server/internal/code.go @@ -16,7 +16,7 @@ var ( codeErrChannelEmpty = NewCode("10004", "channel empty") codeErrGenerateTokenFailed = NewCode("10005", "generate token failed") - codeErrProcessManifestFailed = NewCode("10100", "process manifest json failed") + codeErrProcessPropertyFailed = NewCode("10100", "process property json failed") codeErrStartWorkerFailed = NewCode("10101", "start worker failed") codeErrStopWorkerFailed = NewCode("10102", "stop worker failed") ) diff --git a/server/internal/config.go b/server/internal/config.go new file mode 100644 index 00000000..3c034fe4 --- /dev/null +++ b/server/internal/config.go @@ -0,0 +1,142 @@ +package internal + +import "log/slog" + +type Prop struct { + ExtensionName string + Property string +} + +const ( + // Extension name + extensionNameAgoraRTC = "agora_rtc" + extensionNameBedrockLLM = "bedrock_llm" + extensionNameAzureTTS = "azure_tts" + extensionNameCosyTTS = "cosy_tts" + extensionNameElevenlabsTTS = "elevenlabs_tts" + extensionNameOpenaiChatgpt = "openai_chatgpt" + extensionNamePollyTTS = "polly_tts" + extensionNameQwenLLM = "qwen_llm" + + // Language + languageChinese = "zh-CN" + languageEnglish = "en-US" + // Default graph name + graphNameDefault = "va.openai.azure" + // Property json + PropertyJsonFile = "./agents/property.json" + // Token expire time + tokenExpirationInSeconds = uint32(86400) + // Voice type + voiceTypeMale = "male" + voiceTypeFemale = "female" +) + +var ( + logTag = slog.String("service", "HTTP_SERVER") + + // Retrieve configuration information from environment variables and map it to the property.json file + EnvPropMap = map[string][]Prop{ + "AGORA_APP_ID": { + {ExtensionName: extensionNameAgoraRTC, Property: "app_id"}, + }, + "AWS_ACCESS_KEY_ID": { + {ExtensionName: extensionNameBedrockLLM, Property: "access_key"}, + {ExtensionName: extensionNamePollyTTS, Property: "access_key"}, + }, + "AWS_SECRET_ACCESS_KEY": { + {ExtensionName: extensionNameBedrockLLM, Property: "secret_key"}, + {ExtensionName: extensionNamePollyTTS, Property: "secret_key"}, + }, + "AWS_BEDROCK_MODEL": { + {ExtensionName: extensionNameBedrockLLM, Property: "model"}, + }, + "AWS_REGION": { + {ExtensionName: extensionNameBedrockLLM, Property: "region"}, + }, + "AZURE_STT_KEY": { + {ExtensionName: extensionNameAgoraRTC, Property: "agora_asr_vendor_key"}, + }, + "AZURE_STT_REGION": { + {ExtensionName: extensionNameAgoraRTC, Property: "agora_asr_vendor_region"}, + }, + "AZURE_TTS_KEY": { + {ExtensionName: extensionNameAzureTTS, Property: "azure_subscription_key"}, + }, + "AZURE_TTS_REGION": { + {ExtensionName: extensionNameAzureTTS, Property: "azure_subscription_region"}}, + "COSY_TTS_KEY": { + {ExtensionName: extensionNameCosyTTS, Property: "api_key"}, + }, + "ELEVENLABS_TTS_KEY": { + {ExtensionName: extensionNameElevenlabsTTS, Property: "api_key"}, + }, + "OPENAI_API_KEY": { + {ExtensionName: extensionNameOpenaiChatgpt, Property: "api_key"}, + }, + "OPENAI_BASE_URL": { + {ExtensionName: extensionNameOpenaiChatgpt, Property: "base_url"}, + }, + "OPENAI_MODEL": { + {ExtensionName: extensionNameOpenaiChatgpt, Property: "model"}, + }, + "OPENAI_PROXY_URL": { + {ExtensionName: extensionNameOpenaiChatgpt, Property: "proxy_url"}, + }, + "QWEN_API_KEY": { + {ExtensionName: extensionNameQwenLLM, Property: "api_key"}, + }, + } + + // Retrieve parameters from the request and map them to the property.json file + startPropMap = map[string][]Prop{ + "AgoraAsrLanguage": { + {ExtensionName: extensionNameAgoraRTC, Property: "agora_asr_language"}, + }, + "ChannelName": { + {ExtensionName: extensionNameAgoraRTC, Property: "channel"}, + }, + "RemoteStreamId": { + {ExtensionName: extensionNameAgoraRTC, Property: "remote_stream_id"}, + }, + "Token": { + {ExtensionName: extensionNameAgoraRTC, Property: "token"}, + }, + "VoiceType": { + {ExtensionName: extensionNameAzureTTS, Property: "azure_synthesis_voice_name"}, + {ExtensionName: extensionNameElevenlabsTTS, Property: "voice_id"}, + }, + } + + // Map the voice name to the voice type + voiceNameMap = map[string]map[string]map[string]string{ + languageChinese: { + extensionNameAzureTTS: { + voiceTypeMale: "zh-CN-YunxiNeural", + voiceTypeFemale: "zh-CN-XiaoxiaoNeural", + }, + extensionNameElevenlabsTTS: { + voiceTypeMale: "pNInz6obpgDQGcFmaJgB", // Adam + voiceTypeFemale: "Xb7hH8MSUJpSbSDYk0k2", // Alice + }, + extensionNamePollyTTS: { + voiceTypeMale: "Zhiyu", + voiceTypeFemale: "Zhiyu", + }, + }, + languageEnglish: { + extensionNameAzureTTS: { + voiceTypeMale: "en-US-BrianNeural", + voiceTypeFemale: "en-US-JaneNeural", + }, + extensionNameElevenlabsTTS: { + voiceTypeMale: "pNInz6obpgDQGcFmaJgB", // Adam + voiceTypeFemale: "Xb7hH8MSUJpSbSDYk0k2", // Alice + }, + extensionNamePollyTTS: { + voiceTypeMale: "Matthew", + voiceTypeFemale: "Ruth", + }, + }, + } +) diff --git a/server/internal/http_server.go b/server/internal/http_server.go index 3158edd3..13461156 100644 --- a/server/internal/http_server.go +++ b/server/internal/http_server.go @@ -5,9 +5,6 @@ * Copyright (c) 2024 Agora IO. All rights reserved. * */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. package internal import ( @@ -22,7 +19,6 @@ import ( "github.com/gin-gonic/gin" "github.com/gin-gonic/gin/binding" "github.com/gogf/gf/crypto/gmd5" - "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -33,7 +29,8 @@ type HttpServer struct { type HttpServerConfig struct { AppId string AppCertificate string - ManifestJsonFile string + LogPath string + PropertyJsonFile string Port string TTSVendorChinese string TTSVendorEnglish string @@ -42,98 +39,37 @@ type HttpServerConfig struct { } type PingReq struct { - RequestId string `form:"request_id,omitempty" json:"request_id,omitempty"` - ChannelName string `form:"channel_name,omitempty" json:"channel_name,omitempty"` + RequestId string `json:"request_id,omitempty"` + ChannelName string `json:"channel_name,omitempty"` } type StartReq struct { - RequestId string `form:"request_id,omitempty" json:"request_id,omitempty"` - AgoraAsrLanguage string `form:"agora_asr_language,omitempty" json:"agora_asr_language,omitempty"` - ChannelName string `form:"channel_name,omitempty" json:"channel_name,omitempty"` - RemoteStreamId uint32 `form:"remote_stream_id,omitempty" json:"remote_stream_id,omitempty"` - VoiceType string `form:"voice_type,omitempty" json:"voice_type,omitempty"` + RequestId string `json:"request_id,omitempty"` + AgoraAsrLanguage string `json:"agora_asr_language,omitempty"` + ChannelName string `json:"channel_name,omitempty"` + GraphName string `json:"graph_name,omitempty"` + RemoteStreamId uint32 `json:"remote_stream_id,omitempty"` + Token string `json:"token,omitempty"` + VoiceType string `json:"voice_type,omitempty"` } type StopReq struct { - RequestId string `form:"request_id,omitempty" json:"request_id,omitempty"` - ChannelName string `form:"channel_name,omitempty" json:"channel_name,omitempty"` + RequestId string `json:"request_id,omitempty"` + ChannelName string `json:"channel_name,omitempty"` } type GenerateTokenReq struct { - RequestId string `form:"request_id,omitempty" json:"request_id,omitempty"` - ChannelName string `form:"channel_name,omitempty" json:"channel_name,omitempty"` - Uid uint32 `form:"uid,omitempty" json:"uid,omitempty"` + RequestId string `json:"request_id,omitempty"` + ChannelName string `json:"channel_name,omitempty"` + Uid uint32 `json:"uid,omitempty"` } -const ( - privilegeExpirationInSeconds = uint32(86400) - tokenExpirationInSeconds = uint32(86400) - - languageChinese = "zh-CN" - languageEnglish = "en-US" - - ManifestJsonFile = "./agents/manifest.json" - ManifestJsonFileElevenlabs = "./agents/manifest.elevenlabs.json" - - TTSVendorAzure = "azure" - TTSVendorElevenlabs = "elevenlabs" - - voiceTypeMale = "male" - voiceTypeFemale = "female" -) - -var ( - voiceNameMap = map[string]map[string]map[string]string{ - languageChinese: { - TTSVendorAzure: { - voiceTypeMale: "zh-CN-YunxiNeural", - voiceTypeFemale: "zh-CN-XiaoxiaoNeural", - }, - TTSVendorElevenlabs: { - voiceTypeMale: "pNInz6obpgDQGcFmaJgB", // Adam - voiceTypeFemale: "Xb7hH8MSUJpSbSDYk0k2", // Alice - }, - }, - languageEnglish: { - TTSVendorAzure: { - voiceTypeMale: "en-US-BrianNeural", - voiceTypeFemale: "en-US-JaneNeural", - }, - TTSVendorElevenlabs: { - voiceTypeMale: "pNInz6obpgDQGcFmaJgB", // Adam - voiceTypeFemale: "Xb7hH8MSUJpSbSDYk0k2", // Alice - }, - }, - } - - logTag = slog.String("service", "HTTP_SERVER") -) - func NewHttpServer(httpServerConfig *HttpServerConfig) *HttpServer { return &HttpServer{ config: httpServerConfig, } } -func (s *HttpServer) getManifestJsonFile(language string) (manifestJsonFile string) { - ttsVendor := s.getTtsVendor(language) - manifestJsonFile = ManifestJsonFile - - if ttsVendor == TTSVendorElevenlabs { - manifestJsonFile = ManifestJsonFileElevenlabs - } - - return -} - -func (s *HttpServer) getTtsVendor(language string) string { - if language == languageChinese { - return s.config.TTSVendorChinese - } - - return s.config.TTSVendorEnglish -} - func (s *HttpServer) handlerHealth(c *gin.Context) { slog.Debug("handlerHealth", logTag) s.output(c, codeOk, nil) @@ -200,14 +136,14 @@ func (s *HttpServer) handlerStart(c *gin.Context) { return } - manifestJsonFile, logFile, err := s.processManifest(&req) + propertyJsonFile, logFile, err := s.processProperty(&req) if err != nil { - slog.Error("handlerStart process manifest", "channelName", req.ChannelName, "requestId", req.RequestId, logTag) - s.output(c, codeErrProcessManifestFailed, http.StatusInternalServerError) + slog.Error("handlerStart process property", "channelName", req.ChannelName, "requestId", req.RequestId, logTag) + s.output(c, codeErrProcessPropertyFailed, http.StatusInternalServerError) return } - worker := newWorker(req.ChannelName, logFile, manifestJsonFile) + worker := newWorker(req.ChannelName, logFile, propertyJsonFile) worker.QuitTimeoutSeconds = s.config.WorkerQuitTimeoutSeconds if err := worker.start(&req); err != nil { slog.Error("handlerStart start worker failed", "err", err, "requestId", req.RequestId, logTag) @@ -276,7 +212,7 @@ func (s *HttpServer) handlerGenerateToken(c *gin.Context) { return } - token, err := rtctokenbuilder.BuildTokenWithUid(s.config.AppId, s.config.AppCertificate, req.ChannelName, req.Uid, rtctokenbuilder.RolePublisher, tokenExpirationInSeconds, privilegeExpirationInSeconds) + token, err := rtctokenbuilder.BuildTokenWithUid(s.config.AppId, s.config.AppCertificate, req.ChannelName, req.Uid, rtctokenbuilder.RolePublisher, tokenExpirationInSeconds, tokenExpirationInSeconds) if err != nil { slog.Error("handlerGenerateToken generate token failed", "err", err, "requestId", req.RequestId, logTag) s.output(c, codeErrGenerateTokenFailed, http.StatusBadRequest) @@ -295,59 +231,52 @@ func (s *HttpServer) output(c *gin.Context, code *Code, data any, httpStatus ... c.JSON(httpStatus[0], gin.H{"code": code.code, "msg": code.msg, "data": data}) } -func (s *HttpServer) processManifest(req *StartReq) (manifestJsonFile string, logFile string, err error) { - manifestJsonFile = s.getManifestJsonFile(req.AgoraAsrLanguage) - content, err := os.ReadFile(manifestJsonFile) +func (s *HttpServer) processProperty(req *StartReq) (propertyJsonFile string, logFile string, err error) { + content, err := os.ReadFile(PropertyJsonFile) if err != nil { - slog.Error("handlerStart read manifest.json failed", "err", err, "manifestJsonFile", manifestJsonFile, "requestId", req.RequestId, logTag) + slog.Error("handlerStart read property.json failed", "err", err, "propertyJsonFile", propertyJsonFile, "requestId", req.RequestId, logTag) return } - manifestJson := string(content) + propertyJson := string(content) - if s.config.AppId != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.app_id`, s.config.AppId) + // Get graph name + graphName := req.GraphName + if graphName == "" { + graphName = graphNameDefault } - appId := gjson.Get(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.app_id`).String() // Generate token - token := appId + req.Token = s.config.AppId if s.config.AppCertificate != "" { - token, err = rtctokenbuilder.BuildTokenWithUid(appId, s.config.AppCertificate, req.ChannelName, 0, rtctokenbuilder.RoleSubscriber, tokenExpirationInSeconds, privilegeExpirationInSeconds) + req.Token, err = rtctokenbuilder.BuildTokenWithUid(s.config.AppId, s.config.AppCertificate, req.ChannelName, 0, rtctokenbuilder.RoleSubscriber, tokenExpirationInSeconds, tokenExpirationInSeconds) if err != nil { slog.Error("handlerStart generate token failed", "err", err, "requestId", req.RequestId, logTag) return } } - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.token`, token) - if req.AgoraAsrLanguage != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.agora_asr_language`, req.AgoraAsrLanguage) - } - if req.ChannelName != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.channel`, req.ChannelName) - } - if req.RemoteStreamId != 0 { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.remote_stream_id`, req.RemoteStreamId) - } - - language := gjson.Get(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.agora_asr_language`).String() - - ttsVendor := s.getTtsVendor(language) - voiceName := voiceNameMap[language][ttsVendor][req.VoiceType] - if voiceName != "" { - if ttsVendor == TTSVendorAzure { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="azure_tts").property.azure_synthesis_voice_name`, voiceName) - } else if ttsVendor == TTSVendorElevenlabs { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="elevenlabs_tts").property.voice_id`, voiceName) + graph := fmt.Sprintf(`rte.predefined_graphs.#(name=="%s")`, graphName) + // Automatically start on launch + propertyJson, _ = sjson.Set(propertyJson, fmt.Sprintf(`%s.auto_start`, graph), true) + + // Set parameters from the request to property.json + for key, props := range startPropMap { + if val := getFieldValue(req, key); val != "" { + for _, prop := range props { + if key == "VoiceType" { + val = voiceNameMap[req.AgoraAsrLanguage][prop.ExtensionName][req.VoiceType] + } + propertyJson, _ = sjson.Set(propertyJson, fmt.Sprintf(`%s.nodes.#(name=="%s").property.%s`, graph, prop.ExtensionName, prop.Property), val) + } } } channelNameMd5 := gmd5.MustEncryptString(req.ChannelName) ts := time.Now().UnixNano() - manifestJsonFile = fmt.Sprintf("/tmp/manifest-%s-%d.json", channelNameMd5, ts) - logFile = fmt.Sprintf("/tmp/app-%s-%d.log", channelNameMd5, ts) - os.WriteFile(manifestJsonFile, []byte(manifestJson), 0644) + propertyJsonFile = fmt.Sprintf("%s/property-%s-%d.json", s.config.LogPath, channelNameMd5, ts) + logFile = fmt.Sprintf("%s/app-%s-%d.log", s.config.LogPath, channelNameMd5, ts) + os.WriteFile(propertyJsonFile, []byte(propertyJson), 0644) return } @@ -366,5 +295,5 @@ func (s *HttpServer) Start() { slog.Info("server start", "port", s.config.Port, logTag) go cleanWorker() - r.Run(s.config.Port) + r.Run(fmt.Sprintf(":%s", s.config.Port)) } diff --git a/server/internal/utils.go b/server/internal/utils.go new file mode 100644 index 00000000..0342c36d --- /dev/null +++ b/server/internal/utils.go @@ -0,0 +1,39 @@ +package internal + +import ( + "reflect" +) + +func getFieldValue(req any, fieldName string) any { + v := reflect.ValueOf(req) + if v.Kind() == reflect.Ptr { + v = v.Elem() + } + + field := v.FieldByName(fieldName) + + if field.IsValid() { + switch field.Kind() { + case reflect.Bool: + return field.Bool() + case reflect.Float32: + return float32(field.Float()) + case reflect.Float64: + return field.Float() + case reflect.Int: + return field.Int() + case reflect.Int32: + return int(field.Int()) + case reflect.Int64: + return field.Int() + case reflect.Uint32: + return field.Uint() + case reflect.Uint64: + return field.Uint() + case reflect.String: + return field.String() + } + } + + return nil +} diff --git a/server/internal/worker.go b/server/internal/worker.go index 788649f1..c250c296 100644 --- a/server/internal/worker.go +++ b/server/internal/worker.go @@ -16,7 +16,7 @@ import ( type Worker struct { ChannelName string LogFile string - ManifestJsonFile string + PropertyJsonFile string Pid int QuitTimeoutSeconds int CreateTs int64 @@ -32,11 +32,11 @@ var ( workers = gmap.New(true) ) -func newWorker(channelName string, logFile string, manifestJsonFile string) *Worker { +func newWorker(channelName string, logFile string, propertyJsonFile string) *Worker { return &Worker{ ChannelName: channelName, LogFile: logFile, - ManifestJsonFile: manifestJsonFile, + PropertyJsonFile: propertyJsonFile, QuitTimeoutSeconds: 60, CreateTs: time.Now().Unix(), UpdateTs: time.Now().Unix(), @@ -44,14 +44,14 @@ func newWorker(channelName string, logFile string, manifestJsonFile string) *Wor } func (w *Worker) start(req *StartReq) (err error) { - shell := fmt.Sprintf("cd /app/agents && nohup %s --manifest %s > %s 2>&1 &", workerExec, w.ManifestJsonFile, w.LogFile) + shell := fmt.Sprintf("cd /app/agents && nohup %s --property %s > %s 2>&1 &", workerExec, w.PropertyJsonFile, w.LogFile) slog.Info("Worker start", "requestId", req.RequestId, "shell", shell, logTag) if _, err = exec.Command("sh", "-c", shell).CombinedOutput(); err != nil { slog.Error("Worker start failed", "err", err, "requestId", req.RequestId, logTag) return } - shell = fmt.Sprintf("ps aux | grep %s | grep -v grep | awk '{print $2}'", w.ManifestJsonFile) + shell = fmt.Sprintf("ps aux | grep %s | grep -v grep | awk '{print $2}'", w.PropertyJsonFile) slog.Info("Worker get pid", "requestId", req.RequestId, "shell", shell, logTag) output, err := exec.Command("sh", "-c", shell).CombinedOutput() if err != nil { diff --git a/server/main.go b/server/main.go index 2c3274d9..442e6506 100644 --- a/server/main.go +++ b/server/main.go @@ -1,146 +1,86 @@ package main import ( - "flag" + "fmt" "log/slog" "os" "strconv" + "github.com/joho/godotenv" + "github.com/tidwall/gjson" "github.com/tidwall/sjson" "app/internal" ) func main() { - httpServerConfig := &internal.HttpServerConfig{} - - ttsVendorChinese := os.Getenv("TTS_VENDOR_CHINESE") - if len(ttsVendorChinese) == 0 { - ttsVendorChinese = internal.TTSVendorAzure + // Load .env + err := godotenv.Load() + if err != nil { + slog.Warn("load .env file failed", "err", err) } - ttsVendorEnglish := os.Getenv("TTS_VENDOR_ENGLISH") - if len(ttsVendorEnglish) == 0 { - ttsVendorEnglish = internal.TTSVendorAzure + // Check environment + agoraAppId := os.Getenv("AGORA_APP_ID") + if len(agoraAppId) != 32 { + slog.Error("environment AGORA_APP_ID invalid") + os.Exit(1) } workersMax, err := strconv.Atoi(os.Getenv("WORKERS_MAX")) if err != nil || workersMax <= 0 { - workersMax = 2 + slog.Error("environment WORKERS_MAX invalid") + os.Exit(1) } workerQuitTimeoutSeconds, err := strconv.Atoi(os.Getenv("WORKER_QUIT_TIMEOUT_SECONDES")) if err != nil || workerQuitTimeoutSeconds <= 0 { - workerQuitTimeoutSeconds = 60 + slog.Error("environment WORKER_QUIT_TIMEOUT_SECONDES invalid") + os.Exit(1) } - flag.StringVar(&httpServerConfig.AppId, "appId", os.Getenv("AGORA_APP_ID"), "agora appid") - flag.StringVar(&httpServerConfig.AppCertificate, "appCertificate", os.Getenv("AGORA_APP_CERTIFICATE"), "agora certificate") - flag.StringVar(&httpServerConfig.Port, "port", ":8080", "http server port") - flag.StringVar(&httpServerConfig.TTSVendorChinese, "ttsVendorChinese", ttsVendorChinese, "tts vendor for chinese") - flag.StringVar(&httpServerConfig.TTSVendorEnglish, "ttsVendorEnglish", ttsVendorEnglish, "tts vendor for english") - flag.IntVar(&httpServerConfig.WorkersMax, "workersMax", workersMax, "workers max") - flag.IntVar(&httpServerConfig.WorkerQuitTimeoutSeconds, "workerQuitTimeoutSeconds", workerQuitTimeoutSeconds, "worker quit timeout seconds") - flag.Parse() - - slog.Info("server config", "ttsVendorChinese", httpServerConfig.TTSVendorChinese, "ttsVendorEnglish", httpServerConfig.TTSVendorEnglish, - "workersMax", httpServerConfig.WorkersMax, "workerQuitTimeoutSeconds", httpServerConfig.WorkerQuitTimeoutSeconds) + // Process property.json + if err = processProperty(internal.PropertyJsonFile); err != nil { + slog.Error("process property.json failed", "err", err) + os.Exit(1) + } - processManifest(internal.ManifestJsonFile) - processManifest(internal.ManifestJsonFileElevenlabs) + // Start server + httpServerConfig := &internal.HttpServerConfig{ + AppId: agoraAppId, + AppCertificate: os.Getenv("AGORA_APP_CERTIFICATE"), + LogPath: os.Getenv("LOG_PATH"), + Port: os.Getenv("SERVER_PORT"), + WorkersMax: workersMax, + WorkerQuitTimeoutSeconds: workerQuitTimeoutSeconds, + } httpServer := internal.NewHttpServer(httpServerConfig) httpServer.Start() } -func processManifest(manifestJsonFile string) (err error) { - content, err := os.ReadFile(manifestJsonFile) +func processProperty(propertyJsonFile string) (err error) { + content, err := os.ReadFile(propertyJsonFile) if err != nil { - slog.Error("read manifest.json failed", "err", err, "manifestJsonFile", manifestJsonFile) + slog.Error("read property.json failed", "err", err, "propertyJsonFile", propertyJsonFile) return } - manifestJson := string(content) - - appId := os.Getenv("AGORA_APP_ID") - if appId != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.app_id`, appId) - } - - azureSttKey := os.Getenv("AZURE_STT_KEY") - if azureSttKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.agora_asr_vendor_key`, azureSttKey) - } - - azureSttRegion := os.Getenv("AZURE_STT_REGION") - if azureSttRegion != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="agora_rtc").property.agora_asr_vendor_region`, azureSttRegion) - } - - openaiBaseUrl := os.Getenv("OPENAI_BASE_URL") - if openaiBaseUrl != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="openai_chatgpt").property.base_url`, openaiBaseUrl) - } - - openaiApiKey := os.Getenv("OPENAI_API_KEY") - if openaiApiKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="openai_chatgpt").property.api_key`, openaiApiKey) - } - - openaiModel := os.Getenv("OPENAI_MODEL") - if openaiModel != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="openai_chatgpt").property.model`, openaiModel) - } - - proxyUrl := os.Getenv("PROXY_URL") - if proxyUrl != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="openai_chatgpt").property.proxy_url`, proxyUrl) - } - - azureTtsKey := os.Getenv("AZURE_TTS_KEY") - if azureTtsKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="azure_tts").property.azure_subscription_key`, azureTtsKey) - } - - azureTtsRegion := os.Getenv("AZURE_TTS_REGION") - if azureTtsRegion != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="azure_tts").property.azure_subscription_region`, azureTtsRegion) - } - - awsRegion := os.Getenv("AWS_REGION") - if awsRegion != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.region`, awsRegion) - } - - awsAccessKey := os.Getenv("AWS_ACCESS_KEY_ID") - if awsAccessKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.access_key`, awsAccessKey) - } - - awsSecretKey := os.Getenv("AWS_SECRET_ACCESS_KEY") - if awsSecretKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.secret_key`, awsSecretKey) - } - - bedrockModel := os.Getenv("AWS_BEDROCK_MODEL") - if bedrockModel != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.model`, bedrockModel) - } - - elevenlabsTtsKey := os.Getenv("ELEVENLABS_TTS_KEY") - if elevenlabsTtsKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="elevenlabs_tts").property.api_key`, elevenlabsTtsKey) - } - - cosyTtsKey := os.Getenv("COSY_TTS_KEY") - if cosyTtsKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="cosy_tts").property.api_key`, cosyTtsKey) - } + propertyJson := string(content) + for i := range gjson.Get(propertyJson, "rte.predefined_graphs").Array() { + graph := fmt.Sprintf("rte.predefined_graphs.%d", i) + // Shut down all auto-starting Graphs + propertyJson, _ = sjson.Set(propertyJson, fmt.Sprintf(`%s.auto_start`, graph), false) - qwenApiKey := os.Getenv("QWEN_API_KEY") - if qwenApiKey != "" { - manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="qwen_llm").property.api_key`, qwenApiKey) + // Set environment variable values to property.json + for envKey, envProps := range internal.EnvPropMap { + if envVal := os.Getenv(envKey); envVal != "" { + for _, envProp := range envProps { + propertyJson, _ = sjson.Set(propertyJson, fmt.Sprintf(`%s.nodes.#(name=="%s").property.%s`, graph, envProp.ExtensionName, envProp.Property), envVal) + } + } + } } - err = os.WriteFile(manifestJsonFile, []byte(manifestJson), 0644) + err = os.WriteFile(propertyJsonFile, []byte(propertyJson), 0644) return }