diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 25c72292..0482866c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,7 +2,7 @@ // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile { "name": "astra", - "image": "agoraio/astra_agents_build", + "image": "ghcr.io/rte-design/astra_agents_build", "customizations": { "vscode": { diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index dffb6874..d4d2710f 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -2,7 +2,7 @@ name: Build Docker on: push: - branches: [ "main" ] + branches: [ "main", "python-experimental-develop" ] # Publish semver tags as releases. tags: [ 'v*.*.*' ] paths-ignore: @@ -36,6 +36,6 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} registry: ghcr.io - tags: "latest,${{ steps.pre-step.outputs.image-tag }}" + tags: "${{ github.ref == 'refs/heads/main' && 'latest,' || '' }}${{ steps.pre-step.outputs.image-tag }}" no_push: ${{ github.event_name == 'pull_request' }} diff --git a/.gitignore b/.gitignore index e5f56f6c..33708b7b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,8 +20,11 @@ crash_context_v1 include/ interface/ lib/ +lib64 agents/manifest.json agents/manifest.elevenlabs.json +agents/manifest.cn.json +agents/manifest.en.json !agents/addon/manifest.json node_modules/ /out/ @@ -32,3 +35,5 @@ xdump_config .vscode/ speechsdk/ SpeechSDK-Linux.tar.gz +pyvenv.cfg +xdump_config \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..b0181771 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.analysis.extraPaths": [ + "./agents/interface", + ], + "editor.formatOnSave": true, + } \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 70b32294..605bce58 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM agoraio/astra_agents_build:latest AS builder +FROM ghcr.io/rte-design/astra_agents_build:0.3.3 AS builder ARG SESSION_CONTROL_CONF=session_control.conf @@ -6,7 +6,9 @@ WORKDIR /app COPY . . COPY agents/manifest.json.example agents/manifest.json -COPY agents/manifest.json.elevenlabs.example agents/manifest.elevenlabs.json +# COPY agents/manifest.json.elevenlabs.example agents/manifest.elevenlabs.json +COPY agents/manifest.json.cn.example agents/manifest.cn.json +COPY agents/manifest.json.en.example agents/manifest.en.json COPY agents/${SESSION_CONTROL_CONF} agents/session_control.conf RUN make build && \ @@ -20,6 +22,11 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libunwind-dev \ libc++1 \ libssl-dev \ + python3 \ + python3-venv \ + python3-pip \ + python3-dev \ + jq \ ca-certificates \ && apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* @@ -27,6 +34,8 @@ WORKDIR /app COPY --from=builder /app/agents/.release/ agents/ COPY --from=builder /app/server/bin/api /app/server/bin/api +COPY --from=builder /usr/local/lib /usr/local/lib +COPY --from=builder /usr/lib/python3 /usr/lib/python3 EXPOSE 8080 diff --git a/Makefile b/Makefile index e8fb73ec..7e3e4514 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,13 @@ build-server: cd server && go mod tidy && go mod download && go build -o bin/api main.go @echo ">> done" +clean: clean-agents + +clean-agents: + @echo ">> clean agents" + rm -rf agents/manifest.json agents/bin agents/out agents/interface agents/include agents/lib agents/lib64 agents/addon/system agents/addon/extension_group agents/.release + @echo ">> done" + docker-build-server: @echo ">> docker build server" docker build -t $(REGISTRY)$(PROJECT_NAME):$(PROJECT_VERSION) --platform linux/amd64 -f Dockerfile . diff --git a/README.md b/README.md index 2eb18979..cb55c754 100644 --- a/README.md +++ b/README.md @@ -137,9 +137,14 @@ You need to prepare the proper `manifest.json` file first. ```bash # Rename manifest example cp ./agents/manifest.json.example ./agents/manifest.json +cp ./agents/manifest.json.en.example ./agents/manifest.en.json +cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json -# Pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev agoraio/astra_agents_build +# pull the docker image with dev tools and mount your current folder as workspace +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build + +# for windows git bash +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build # Enter docker image docker exec -it astra_agents_dev bash @@ -155,19 +160,22 @@ The above code generates an agent executable. To customize your prompts and Open Once you have made the necessary changes, you can use the following commands to start a server. You can then test it out using the ASTRA voice agent from the showcase. ```bash - +# TODO: need to refactor the contents # Agora App ID and Agora App Certificate export AGORA_APP_ID= export AGORA_APP_CERTIFICATE= # OpenAI API key export OPENAI_API_KEY= +# Or QWEN key +export QWEN_API_KEY= # Azure STT key and region export AZURE_STT_KEY= export AZURE_STT_REGION= -# Here are two TTS options, either one will work +# TTS +# Here are three TTS options, either one will work # Make sure to comment out the one you don't use # 1. using Azure @@ -179,6 +187,9 @@ export AZURE_TTS_REGION= export TTS_VENDOR_ENGLISH=elevenlabs export ELEVENLABS_TTS_KEY= +# 3. using Cosy +export COSY_TTS_KEY= + # agent is ready to start on port 8080 make run-server @@ -186,6 +197,38 @@ make run-server 🎉 Congratulations! You have created your first personalized voice agent. +

Quick Agent Customize Test

+The default agent control is managed via server gateway. For quick testing, you can also run the agent directly. + +``` + +# rename manifest example +cp ./agents/manifest.json.example ./agents/manifest.json +cp ./agents/manifest.json.en.example ./agents/manifest.en.json +cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json + +# pull the docker image with dev tools and mount your current folder as workspace +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build + +# for windows git bash +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build + +# enter docker image +docker exec -it astra_agents_dev bash + +make build + +cd ./agents +# manipulate values in manifest.json to replace , , , with your keys +./bin/start +``` + +use [https://webdemo.agora.io/](https://webdemo.agora.io/) to quickly test. + +Note the `channel` and `remote_stream_id` needs to match with the one you use on `https://webdemo.agora.io/` + +
+

ASTRA Service

Discover More

Now that you’ve created your first AI agent, the creativity doesn’t stop here. To develop more amazing agents, you’ll need an advanced understanding of how the ASTRA works under the hood. Please refer to the [ ASTRA architecture documentation ](./docs/astra-architecture.md). diff --git a/agents/.gitignore b/agents/.gitignore index 5ab2252d..46de7887 100644 --- a/agents/.gitignore +++ b/agents/.gitignore @@ -1,7 +1,9 @@ *.log addon/extension_group/ addon/extension/agora_rtc +addon/extension/py_init_extension_cpp addon/system +.rte agoradns.dat agorareport.dat bin/ @@ -23,3 +25,4 @@ lib/ session_control.conf.agora xdump_config .vscode +*.pyc diff --git a/agents/addon/extension/azure_tts/manifest.json b/agents/addon/extension/azure_tts/manifest.json index c1f70cf5..5064033b 100644 --- a/agents/addon/extension/azure_tts/manifest.json +++ b/agents/addon/extension/azure_tts/manifest.json @@ -1,13 +1,13 @@ { "type": "extension", "name": "azure_tts", - "version": "0.1.0", + "version": "0.2.0", "language": "cpp", "dependencies": [ { "type": "system", "name": "rte_runtime", - "version": "0.1.0" + "version": "0.3.1" }, { "type": "system", diff --git a/agents/addon/extension/azure_tts/src/main.cc b/agents/addon/extension/azure_tts/src/main.cc index 61162f8f..272fc47d 100644 --- a/agents/addon/extension/azure_tts/src/main.cc +++ b/agents/addon/extension/azure_tts/src/main.cc @@ -13,9 +13,6 @@ #include "log.h" #include "macro/check.h" -#include "rte_runtime/binding/cpp/internal/msg/cmd/cmd.h" -#include "rte_runtime/binding/cpp/internal/msg/pcm_frame.h" -#include "rte_runtime/binding/cpp/internal/rte_proxy.h" #include "rte_runtime/binding/cpp/rte.h" #include "tts.h" @@ -31,7 +28,7 @@ class azure_tts_extension_t : public rte::extension_t { // - azure_subscription_key // - azure_subscription_region // - azure_synthesis_voice_name - void on_start(rte::rte_t &rte) override { + void on_start(rte::rte_env_t &rte) override { AZURE_TTS_LOGI("start"); // read properties @@ -45,7 +42,7 @@ class azure_tts_extension_t : public rte::extension_t { return; } - rte_proxy_ = std::unique_ptr(rte::rte_proxy_t::create(rte)); + rte_proxy_ = std::unique_ptr(rte::rte_env_proxy_t::create(rte)); RTE_ASSERT(rte_proxy_ != nullptr, "rte_proxy should not be nullptr"); // pcm parameters @@ -69,17 +66,17 @@ class azure_tts_extension_t : public rte::extension_t { pcm_frame->set_data_fmt(RTE_PCM_FRAME_DATA_FMT_INTERLEAVE); pcm_frame->set_samples_per_channel(samples_per_10ms); pcm_frame->alloc_buf(pcm_frame_size); - rte::buf_t borrowed_buf = pcm_frame->borrow_buf(0); + rte::buf_t borrowed_buf = pcm_frame->lock_buf(0); auto *buf = borrowed_buf.data(); if (buf != nullptr) { memset(buf, 0, pcm_frame_size); // fill empty if size is not enough for 10ms memcpy(buf, data, size); } - pcm_frame->give_back_buf(borrowed_buf); + pcm_frame->unlock_buf(borrowed_buf); auto pcm_frame_shared = std::make_shared>(std::move(pcm_frame)); rte_proxy->notify( - [frame = std::move(pcm_frame_shared)](rte::rte_t &rte) { rte.send_pcm_frame(std::move(*frame)); }); + [frame = std::move(pcm_frame_shared)](rte::rte_env_t &rte) { rte.send_pcm_frame(std::move(*frame)); }); }; @@ -104,9 +101,9 @@ class azure_tts_extension_t : public rte::extension_t { // - name: flush // example: // {"name": "flush"} - void on_cmd(rte::rte_t &rte, std::unique_ptr cmd) override { + void on_cmd(rte::rte_env_t &rte, std::unique_ptr cmd) override { - std::string command = cmd->get_msg_name(); + std::string command = cmd->get_name(); AZURE_TTS_LOGI("%s", command.c_str()); if (command == kCmdNameFlush) { @@ -115,14 +112,14 @@ class azure_tts_extension_t : public rte::extension_t { // passthrough cmd auto ret = rte.send_cmd(rte::cmd_t::create(kCmdNameFlush.c_str())); - if (ret != RTE_STATUS_CODE_OK) { - AZURE_TTS_LOGE("Failed to send cmd %s, ret:%d", kCmdNameFlush.c_str(), int(ret)); - rte.return_string(RTE_STATUS_CODE_ERROR, "Failed to send cmd", std::move(cmd)); + if (!ret) { + AZURE_TTS_LOGE("Failed to send cmd %s", kCmdNameFlush.c_str()); + rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_ERROR), std::move(cmd)); } else { - rte.return_string(RTE_STATUS_CODE_OK, "ok", std::move(cmd)); + rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_OK), std::move(cmd)); } } else { - rte.return_string(RTE_STATUS_CODE_OK, "unregistered cmd", std::move(cmd)); + rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_OK), std::move(cmd)); } } @@ -131,7 +128,7 @@ class azure_tts_extension_t : public rte::extension_t { // - name: text_data // example: // {"name": "text_data", "properties": {"text": "hello"} - void on_data(rte::rte_t &rte, std::unique_ptr data) override { + void on_data(rte::rte_env_t &rte, std::unique_ptr data) override { auto text = data->get_property_string(kDataFieldText.c_str()); if (text.empty()) { @@ -145,12 +142,13 @@ class azure_tts_extension_t : public rte::extension_t { } // on_stop will be called when the extension is stopping. - void on_stop(rte::rte_t &rte) override { + void on_stop(rte::rte_env_t &rte) override { AZURE_TTS_LOGI("stop"); if (azure_tts_) { azure_tts_->Stop(); azure_tts_ = nullptr; } + rte_proxy_.reset(); // Extension stop. rte.on_stop_done(); @@ -158,7 +156,7 @@ class azure_tts_extension_t : public rte::extension_t { } private: - std::unique_ptr rte_proxy_; + std::unique_ptr rte_proxy_; std::unique_ptr azure_tts_; @@ -166,6 +164,6 @@ class azure_tts_extension_t : public rte::extension_t { const std::string kDataFieldText{"text"}; }; -RTE_CXX_REGISTER_ADDON_AS_EXTENSION(azure_tts, azure_tts_extension_t); +RTE_CPP_REGISTER_ADDON_AS_EXTENSION(azure_tts, azure_tts_extension_t); } // namespace azure_tts_extension diff --git a/agents/addon/extension/azure_tts/src/tts.cc b/agents/addon/extension/azure_tts/src/tts.cc index 4bcd1ce7..6da75544 100644 --- a/agents/addon/extension/azure_tts/src/tts.cc +++ b/agents/addon/extension/azure_tts/src/tts.cc @@ -36,18 +36,14 @@ bool AzureTTS::Start() { { std::unique_lock lk(tasks_mutex_); - tasks_cv_.wait(lk, [this]() { return !tasks_.empty(); }); - if (tasks_.empty()) { - continue; + tasks_cv_.wait(lk, [this]() { return stop_.load() || !tasks_.empty(); }); + if (stop_.load()) { + break; } task = std::move(tasks_.front()); tasks_.pop(); } - if (stop_.load()) { - break; - } - SpeechText(task->text, task->ts); } diff --git a/agents/addon/extension/bedrock_llm_python/README.md b/agents/addon/extension/bedrock_llm_python/README.md new file mode 100644 index 00000000..7d9bd713 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/README.md @@ -0,0 +1,12 @@ +## Amazon Bedrock LLM Extension + +### Configurations + +You can config this extension by providing following environments: + +| Env | Required | Default | Notes | +| -- | -- | -- | -- | +| AWS_REGION | No | us-east-1 | The Region of Amazon Bedrock service you want to use. | +| AWS_ACCESS_KEY_ID | No | - | Access Key of your IAM User, make sure you've set proper permissions to [invoke Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html) and gain [models access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) in Bedrock. Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | +| AWS_SECRET_ACCESS_KEY | No | - | Secret Key of your IAM User, make sure you've set proper permissions to [invoke Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html) and gain [models access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) in Bedrock. Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | +| AWS_BEDROCK_MODEL | No | Claude 3.5(anthropic.claude-3-5-sonnet-20240620-v1:0) | Bedrock model id, check [docuement](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns). | \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/__init__.py b/agents/addon/extension/bedrock_llm_python/__init__.py new file mode 100644 index 00000000..4e39b9dc --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/__init__.py @@ -0,0 +1,4 @@ +from . import bedrock_llm_extension +from .log import logger + +logger.info("bedrock_llm_python extension loaded") diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm.py new file mode 100644 index 00000000..c833f7b7 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm.py @@ -0,0 +1,75 @@ +import boto3 +from .log import logger + +class BedrockLLMConfig: + def __init__(self, + region: str, + access_key: str, + secret_key: str, + model: str, + prompt: str, + top_p: float, + temperature: float, + max_tokens: int): + self.region = region + self.access_key = access_key + self.secret_key = secret_key + self.model = model + self.prompt = prompt + self.top_p = top_p + self.temperature = temperature + self.max_tokens = max_tokens + + @classmethod + def default_config(cls): + return cls( + region="us-east-1", + access_key="", + secret_key="", + model="anthropic.claude-3-5-sonnet-20240620-v1:0", # Defaults to Claude 3.5, supported model list: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html + # system prompt + prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", + top_p=1.0, + temperature=0.1, + max_tokens=512, + ) + +class BedrockLLM: + client = None + def __init__(self, config: BedrockLLMConfig): + self.config = config + + if config.access_key and config.secret_key: + logger.info(f"BedrockLLM initialized with access key: {config.access_key}") + + self.client = boto3.client(service_name='bedrock-runtime', + region_name=config.region, + aws_access_key_id=config.access_key, + aws_secret_access_key=config.secret_key) + else: + logger.info(f"BedrockLLM initialized without access key, using default credentials provider chain.") + self.client = boto3.client(service_name='bedrock-runtime', region_name=config.region) + + def get_converse_stream(self, messages): + bedrock_req_params = { + "modelId": self.config.model, + "messages": messages, + "inferenceConfig": { + "temperature": self.config.temperature, + "maxTokens": self.config.max_tokens, + "topP": self.config.top_p, + # "stopSequences": [], + }, + # "additionalModelRequestFields": additional_model_fields, + } + + if self.config.prompt: + bedrock_req_params['system'] = [ + {'text': self.config.prompt} + ] + + try: + response = self.client.converse_stream(**bedrock_req_params) + return response + except Exception as e: + raise Exception(f"GetConverseStream failed, err: {e}") \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py new file mode 100644 index 00000000..e8dd6472 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py @@ -0,0 +1,426 @@ +from .bedrock_llm import BedrockLLM, BedrockLLMConfig +from datetime import datetime +from threading import Thread +from rte import ( + Addon, + Extension, + register_addon_as_extension, + RteEnv, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +from .log import logger + + +CMD_IN_FLUSH = "flush" +CMD_OUT_FLUSH = "flush" +DATA_IN_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT = "end_of_segment" + +PROPERTY_REGION = "region" # Optional +PROPERTY_ACCESS_KEY = "access_key" # Optional +PROPERTY_SECRET_KEY = "secret_key" # Optional +PROPERTY_MODEL = "model" # Optional +PROPERTY_PROMPT = "prompt" # Optional +PROPERTY_TEMPERATURE = "temperature" # Optional +PROPERTY_TOP_P = "top_p" # Optional +PROPERTY_MAX_TOKENS = "max_tokens" # Optional +PROPERTY_GREETING = "greeting" # Optional +PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional + + +def get_current_time(): + # Get the current time + start_time = datetime.now() + # Get the number of microseconds since the Unix epoch + unix_microseconds = int(start_time.timestamp() * 1_000_000) + return unix_microseconds + + +def is_punctuation(char): + if char in [",", ",", ".", "。", "?", "?", "!", "!"]: + return True + return False + + +def parse_sentence(sentence, content): + remain = "" + found_punc = False + + for char in content: + if not found_punc: + sentence += char + else: + remain += char + + if not found_punc and is_punctuation(char): + found_punc = True + + return sentence, remain, found_punc + + +class BedrockLLMExtension(Extension): + memory = [] + max_memory_length = 10 + outdate_ts = 0 + bedrock_llm = None + + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("BedrockLLMExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("BedrockLLMExtension on_start") + # Prepare configuration + bedrock_llm_config = BedrockLLMConfig.default_config() + + try: + region = rte.get_property_string(PROPERTY_REGION) + if region: + bedrock_llm_config.region = region + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_REGION} failed, err: {err}. Using default value: {bedrock_llm_config.region}" + ) + return + + try: + access_key = rte.get_property_string(PROPERTY_ACCESS_KEY) + bedrock_llm_config.access_key = access_key + except Exception as err: + logger.error( + f"GetProperty optional {PROPERTY_ACCESS_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.access_key}" + ) + return + + try: + secret_key = rte.get_property_string(PROPERTY_SECRET_KEY) + bedrock_llm_config.secret_key = secret_key + except Exception as err: + logger.error( + f"GetProperty optional {PROPERTY_SECRET_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.secret_key}" + ) + return + + try: + model = rte.get_property_string(PROPERTY_MODEL) + if model: + bedrock_llm_config.model = model + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_MODEL} error: {err}. Using default value: {bedrock_llm_config.model}" + ) + + try: + prompt = rte.get_property_string(PROPERTY_PROMPT) + if prompt: + bedrock_llm_config.prompt = prompt + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_PROMPT} error: {err}. Using default value: {bedrock_llm_config.prompt}" + ) + + try: + temperature = rte.get_property_float(PROPERTY_TEMPERATURE) + bedrock_llm_config.temperature = float(temperature) + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}. Using default value: {bedrock_llm_config.temperature}" + ) + + try: + top_p = rte.get_property_float(PROPERTY_TOP_P) + bedrock_llm_config.top_p = float(top_p) + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}. Using default value: {bedrock_llm_config.top_p}" + ) + + try: + max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) + if max_tokens > 0: + bedrock_llm_config.max_tokens = int(max_tokens) + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}. Using default value: {bedrock_llm_config.max_tokens}" + ) + + try: + greeting = rte.get_property_string(PROPERTY_GREETING) + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}." + ) + + try: + prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) + if prop_max_memory_length > 0: + self.max_memory_length = int(prop_max_memory_length) + except Exception as err: + logger.debug( + f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}." + ) + + # Create bedrockLLM instance + try: + self.bedrock_llm = BedrockLLM(bedrock_llm_config) + logger.info( + f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}" + ) + except Exception as err: + logger.info(f"newBedrockLLM failed, err: {err}") + + # Send greeting if available + if greeting: + try: + output_data = Data.create("text_data") + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) + rte.send_data(output_data) + logger.info(f"greeting [{greeting}] sent") + except Exception as err: + logger.info(f"greeting [{greeting}] send failed, err: {err}") + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("BedrockLLMExtension on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("BedrockLLMExtension on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("BedrockLLMExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("BedrockLLMExtension on_cmd json: " + cmd_json) + + cmd_name = cmd.get_name() + + if cmd_name == CMD_IN_FLUSH: + self.outdate_ts = get_current_time() + cmd_out = Cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(cmd_out, None) + logger.info(f"BedrockLLMExtension on_cmd sent flush") + else: + logger.info(f"BedrockLLMExtension on_cmd unknown cmd: {cmd_name}") + cmd_result = CmdResult.create(StatusCode.ERROR) + cmd_result.set_property_string("detail", "unknown cmd") + rte.return_result(cmd_result, cmd) + return + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: RteEnv, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + logger.info(f"BedrockLLMExtension on_data") + + # Assume 'data' is an object from which we can get properties + try: + is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) + if not is_final: + logger.info("ignore non-final input") + return + except Exception as err: + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) + return + + # Get input text + try: + input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) + if not input_text: + logger.info("ignore empty text") + return + logger.info(f"OnData input text: [{input_text}]") + except Exception as err: + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) + return + + # Prepare memory. A conversation must alternate between user and assistant roles + while len(self.memory): + if len(self.memory) > self.max_memory_length: + logger.debug( + f"pop out first message, reason: memory length limit: `{self.memory[0]}`" + ) + self.memory.pop(0) + elif self.memory[0]["role"] == "assistant": + logger.debug( + f"pop out first message, reason: messages can not start with assistant: `{self.memory[0]}`" + ) + self.memory.pop(0) + else: + break + + if len(self.memory) and self.memory[-1]["role"] == "user": + # if last user input got empty response, append current user input. + logger.debug( + f"found last message with role `user`, will append this input into last user input" + ) + self.memory[-1]["content"].append({"text": input_text}) + else: + self.memory.append({"role": "user", "content": [{"text": input_text}]}) + + def converse_stream_worker(start_time, input_text, memory): + try: + logger.info( + f"GetConverseStream for input text: [{input_text}] memory: {memory}" + ) + + # Get result from Bedrock + resp = self.bedrock_llm.get_converse_stream(memory) + if resp is None or resp.get("stream") is None: + logger.info( + f"GetConverseStream for input text: [{input_text}] failed" + ) + return + + stream = resp.get("stream") + sentence = "" + full_content = "" + first_sentence_sent = False + + for event in stream: + if start_time < self.outdate_ts: + logger.info( + f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}" + ) + break + + if "contentBlockDelta" in event: + delta_types = event["contentBlockDelta"]["delta"].keys() + # ignore other types of content: e.g toolUse + if "text" in delta_types: + content = event["contentBlockDelta"]["delta"]["text"] + elif ( + "internalServerException" in event + or "modelStreamErrorException" in event + or "throttlingException" in event + or "validationException" in event + ): + logger.error(f"GetConverseStream Error occured: {event}") + break + else: + # ingore other events + continue + + full_content += content + + while True: + sentence, content, sentence_is_final = parse_sentence( + sentence, content + ) + if len(sentence) == 0 or not sentence_is_final: + logger.info(f"sentence {sentence} is empty or not final") + break + logger.info( + f"GetConverseStream recv for input text: [{input_text}] got sentence: [{sentence}]" + ) + + # send sentence + try: + output_data = Data.create("text_data") + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False + ) + rte.send_data(output_data) + logger.info( + f"GetConverseStream recv for input text: [{input_text}] sent sentence [{sentence}]" + ) + except Exception as err: + logger.info( + f"GetConverseStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}" + ) + break + + sentence = "" + if not first_sentence_sent: + first_sentence_sent = True + logger.info( + f"GetConverseStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms" + ) + + if len(full_content.strip()): + # remember response as assistant content in memory + memory.append( + {"role": "assistant", "content": [{"text": full_content}]} + ) + else: + # can not put empty model response into memory + logger.error( + f"GetConverseStream recv for input text: [{input_text}] failed: empty response [{full_content}]" + ) + return + + # send end of segment + try: + output_data = Data.create("text_data") + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) + rte.send_data(output_data) + logger.info( + f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent" + ) + except Exception as err: + logger.info( + f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}" + ) + + except Exception as e: + logger.info( + f"GetConverseStream for input text: [{input_text}] failed, err: {e}" + ) + + # Start thread to request and read responses from OpenAI + start_time = get_current_time() + thread = Thread( + target=converse_stream_worker, args=(start_time, input_text, self.memory) + ) + thread.start() + logger.info(f"BedrockLLMExtension on_data end") + + +@register_addon_as_extension("bedrock_llm_python") +class BedrockLLMExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("BedrockLLMExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(BedrockLLMExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("BedrockLLMExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/bedrock_llm_python/log.py b/agents/addon/extension/bedrock_llm_python/log.py new file mode 100644 index 00000000..7261cd01 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("bedrock_llm_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/bedrock_llm_python/manifest.json b/agents/addon/extension/bedrock_llm_python/manifest.json new file mode 100644 index 00000000..7f912581 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/manifest.json @@ -0,0 +1,68 @@ +{ + "type": "extension", + "name": "bedrock_llm_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "property": { + "access_key": { + "type": "string" + }, + "secret_key": { + "type": "string" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/property.json b/agents/addon/extension/bedrock_llm_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/requirements.txt b/agents/addon/extension/bedrock_llm_python/requirements.txt new file mode 100644 index 00000000..0a92c2be --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/requirements.txt @@ -0,0 +1 @@ +boto3==1.34.143 \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber/extension.go b/agents/addon/extension/chat_transcriber/extension.go index 33fba48a..d684e7c6 100644 --- a/agents/addon/extension/chat_transcriber/extension.go +++ b/agents/addon/extension/chat_transcriber/extension.go @@ -13,7 +13,7 @@ import ( "log/slog" "time" - "agora.io/rte/rtego" + "agora.io/rte/rte" "google.golang.org/protobuf/proto" ) @@ -29,12 +29,12 @@ var ( ) type chatTranscriberExtension struct { - rtego.DefaultExtension + rte.DefaultExtension cachedTextMap map[uint32]string // record the cached text data for each stream id } -func newExtension(name string) rtego.Extension { +func newExtension(name string) rte.Extension { return &chatTranscriberExtension{ cachedTextMap: make(map[uint32]string), } @@ -46,8 +46,8 @@ func newExtension(name string) rtego.Extension { // example: // {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} func (p *chatTranscriberExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { // Get the text data from data. text, err := data.GetPropertyString(textDataTextField) @@ -126,22 +126,22 @@ func (p *chatTranscriberExtension) OnData( } // convert the origin text data to the protobuf data and send it to the graph. - rteData, err := rtego.NewData("data") + rteData, err := rte.NewData("data") rteData.SetPropertyBytes("data", pbData) if err != nil { slog.Warn(fmt.Sprintf("OnData NewData error: %v", err), logTag) return } - rte.SendData(rteData) + rteEnv.SendData(rteData) } func init() { slog.Info("chat_transcriber extension init", logTag) // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "chat_transcriber", - rtego.NewDefaultExtensionAddon(newExtension), + rte.NewDefaultExtensionAddon(newExtension), ) } diff --git a/agents/addon/extension/chat_transcriber/manifest.json b/agents/addon/extension/chat_transcriber/manifest.json index 269a5a53..8d3470fd 100644 --- a/agents/addon/extension/chat_transcriber/manifest.json +++ b/agents/addon/extension/chat_transcriber/manifest.json @@ -4,15 +4,10 @@ "version": "0.1.0", "language": "go", "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/chat_transcriber_python/__init__.py b/agents/addon/extension/chat_transcriber_python/__init__.py new file mode 100644 index 00000000..3a570f49 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/__init__.py @@ -0,0 +1,4 @@ +from . import chat_transcriber_addon +from .log import logger + +logger.info("chat_transcriber_python extension loaded") diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py new file mode 100644 index 00000000..9e141265 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py @@ -0,0 +1,26 @@ +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .log import logger + + +@register_addon_as_extension("chat_transcriber_python") +class ChatTranscriberExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .chat_transcriber_extension import ChatTranscriberExtension + + rte.on_create_instance_done(ChatTranscriberExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py new file mode 100644 index 00000000..de8a3dc6 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -0,0 +1,147 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +import json +from rte import ( + Extension, + RteEnv, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +import time +from .pb import chat_text_pb2 as pb +from .log import logger + +CMD_NAME_FLUSH = "flush" + +TEXT_DATA_TEXT_FIELD = "text" +TEXT_DATA_FINAL_FIELD = "is_final" +TEXT_DATA_STREAM_ID_FIELD = "stream_id" +TEXT_DATA_END_OF_SEGMENT_FIELD = "end_of_segment" + +# record the cached text data for each stream id +cached_text_map = {} + + +class ChatTranscriberExtension(Extension): + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("on_start") + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("on_cmd") + cmd_json = cmd.to_json() + logger.info("on_cmd json: {}".format(cmd_json)) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: RteEnv, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} + """ + logger.info(f"on_data") + + try: + text = data.get_property_string(TEXT_DATA_TEXT_FIELD) + except Exception as e: + logger.exception( + f"on_data get_property_string {TEXT_DATA_TEXT_FIELD} error: {e}" + ) + return + + try: + final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) + except Exception as e: + logger.exception( + f"on_data get_property_bool {TEXT_DATA_FINAL_FIELD} error: {e}" + ) + return + + try: + stream_id = data.get_property_int(TEXT_DATA_STREAM_ID_FIELD) + except Exception as e: + logger.exception( + f"on_data get_property_int {TEXT_DATA_STREAM_ID_FIELD} error: {e}" + ) + return + + try: + end_of_segment = data.get_property_bool(TEXT_DATA_END_OF_SEGMENT_FIELD) + except Exception as e: + logger.exception( + f"on_data get_property_bool {TEXT_DATA_END_OF_SEGMENT_FIELD} error: {e}" + ) + return + + logger.debug( + f"on_data {TEXT_DATA_TEXT_FIELD}: {text} {TEXT_DATA_FINAL_FIELD}: {final} {TEXT_DATA_STREAM_ID_FIELD}: {stream_id} {TEXT_DATA_END_OF_SEGMENT_FIELD}: {end_of_segment}" + ) + + # We cache all final text data and append the non-final text data to the cached data + # until the end of the segment. + if end_of_segment: + if stream_id in cached_text_map: + text = cached_text_map[stream_id] + text + del cached_text_map[stream_id] + else: + if final: + if stream_id in cached_text_map: + text = cached_text_map[stream_id] + text + + cached_text_map[stream_id] = text + + pb_text = pb.Text( + uid=stream_id, + data_type="transcribe", + texttime=int(time.time() * 1000), # Convert to milliseconds + words=[ + pb.Word( + text=text, + is_final=end_of_segment, + ), + ], + ) + + try: + pb_serialized_text = pb_text.SerializeToString() + except Exception as e: + logger.warning(f"on_data SerializeToString error: {e}") + return + + try: + # convert the origin text data to the protobuf data and send it to the graph. + rte_data = Data.create("data") + rte_data.set_property_buf("data", pb_serialized_text) + rte.send_data(rte_data) + logger.info("data sent") + except Exception as e: + logger.warning(f"on_data new_data error: {e}") + return diff --git a/agents/addon/extension/chat_transcriber_python/log.py b/agents/addon/extension/chat_transcriber_python/log.py new file mode 100644 index 00000000..e30358c1 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("chat_transcriber_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/chat_transcriber_python/manifest.json b/agents/addon/extension/chat_transcriber_python/manifest.json new file mode 100644 index 00000000..f64295b6 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/manifest.json @@ -0,0 +1,40 @@ +{ + "type": "extension", + "name": "chat_transcriber_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "property": {}, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + }, + "stream_id": { + "type": "uint32" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ], + "data_out": [ + { + "name": "data" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/pb/chat_text.proto b/agents/addon/extension/chat_transcriber_python/pb/chat_text.proto new file mode 100644 index 00000000..c770eb49 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/pb/chat_text.proto @@ -0,0 +1,37 @@ +syntax = "proto3"; + +package agora.chat_transcriber_python; +option go_package = ".;pb"; + +message Text { + int32 vendor = 1; + int32 version = 2; + int32 seqnum = 3; + int32 uid = 4; + int32 flag = 5; + int64 time = 6; // final time =first nofinal time + int32 lang = 7; + int32 starttime = 8; + int32 offtime = 9; + repeated Word words = 10; + bool end_of_segment = 11; + int32 duration_ms = 12; + string data_type = 13; // transcribe ,translate + repeated Translation trans = 14; + string culture = 15; + int64 texttime = 16; // pkg timestamp +} + +message Word { + string text = 1; + int32 start_ms = 2; + int32 duration_ms = 3; + bool is_final = 4; + double confidence = 5; +} + +message Translation { + bool is_final = 1; + string lang = 2; + repeated string texts = 3; +} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py b/agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py new file mode 100644 index 00000000..17142fc8 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: chat_text.proto +# Protobuf Python Version: 5.27.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 5, + 27, + 2, + '', + 'chat_text.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x63hat_text.proto\x12\x1d\x61gora.chat_transcriber_python\"\xe4\x02\n\x04Text\x12\x0e\n\x06vendor\x18\x01 \x01(\x05\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12\x0e\n\x06seqnum\x18\x03 \x01(\x05\x12\x0b\n\x03uid\x18\x04 \x01(\x05\x12\x0c\n\x04\x66lag\x18\x05 \x01(\x05\x12\x0c\n\x04time\x18\x06 \x01(\x03\x12\x0c\n\x04lang\x18\x07 \x01(\x05\x12\x11\n\tstarttime\x18\x08 \x01(\x05\x12\x0f\n\x07offtime\x18\t \x01(\x05\x12\x32\n\x05words\x18\n \x03(\x0b\x32#.agora.chat_transcriber_python.Word\x12\x16\n\x0e\x65nd_of_segment\x18\x0b \x01(\x08\x12\x13\n\x0b\x64uration_ms\x18\x0c \x01(\x05\x12\x11\n\tdata_type\x18\r \x01(\t\x12\x39\n\x05trans\x18\x0e \x03(\x0b\x32*.agora.chat_transcriber_python.Translation\x12\x0f\n\x07\x63ulture\x18\x0f \x01(\t\x12\x10\n\x08texttime\x18\x10 \x01(\x03\"a\n\x04Word\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08start_ms\x18\x02 \x01(\x05\x12\x13\n\x0b\x64uration_ms\x18\x03 \x01(\x05\x12\x10\n\x08is_final\x18\x04 \x01(\x08\x12\x12\n\nconfidence\x18\x05 \x01(\x01\"<\n\x0bTranslation\x12\x10\n\x08is_final\x18\x01 \x01(\x08\x12\x0c\n\x04lang\x18\x02 \x01(\t\x12\r\n\x05texts\x18\x03 \x03(\tB\x06Z\x04.;pbb\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chat_text_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'Z\004.;pb' + _globals['_TEXT']._serialized_start=51 + _globals['_TEXT']._serialized_end=407 + _globals['_WORD']._serialized_start=409 + _globals['_WORD']._serialized_end=506 + _globals['_TRANSLATION']._serialized_start=508 + _globals['_TRANSLATION']._serialized_end=568 +# @@protoc_insertion_point(module_scope) diff --git a/agents/addon/extension/chat_transcriber_python/property.json b/agents/addon/extension/chat_transcriber_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/requirements.txt b/agents/addon/extension/chat_transcriber_python/requirements.txt new file mode 100644 index 00000000..bcc3fd65 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/requirements.txt @@ -0,0 +1,2 @@ +protobuf==5.27.2 +google==3.0.0 \ No newline at end of file diff --git a/agents/addon/extension/cosy_tts/__init__.py b/agents/addon/extension/cosy_tts/__init__.py new file mode 100644 index 00000000..d7a1c8ec --- /dev/null +++ b/agents/addon/extension/cosy_tts/__init__.py @@ -0,0 +1,3 @@ +from . import cosy_tts_addon + +print("cosy_tts extension loaded") diff --git a/agents/addon/extension/cosy_tts/cosy_tts_addon.py b/agents/addon/extension/cosy_tts/cosy_tts_addon.py new file mode 100644 index 00000000..b9135b3e --- /dev/null +++ b/agents/addon/extension/cosy_tts/cosy_tts_addon.py @@ -0,0 +1,26 @@ +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .log import logger + + +@register_addon_as_extension("cosy_tts") +class CosyTTSExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("CosyTTSExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .cosy_tts_extension import CosyTTSExtension + + rte.on_create_instance_done(CosyTTSExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("CosyTTSExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/cosy_tts/cosy_tts_extension.py b/agents/addon/extension/cosy_tts/cosy_tts_extension.py new file mode 100644 index 00000000..79ecfcd8 --- /dev/null +++ b/agents/addon/extension/cosy_tts/cosy_tts_extension.py @@ -0,0 +1,254 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +import traceback +from rte import ( + Extension, + RteEnv, + Cmd, + PcmFrame, + RTE_PCM_FRAME_DATA_FMT, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +from typing import List, Any +import dashscope +import queue +import threading +from datetime import datetime +from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat +from .log import logger + + +class CosyTTSCallback(ResultCallback): + _player = None + _stream = None + + def __init__(self, rte: RteEnv, sample_rate: int): + super().__init__() + self.rte = rte + self.sample_rate = sample_rate + self.frame_size = int(self.sample_rate * 1 * 2 / 100) + self.canceled = False + self.closed = False + + def on_open(self): + logger.info("websocket is open.") + + def on_complete(self): + logger.info("speech synthesis task complete successfully.") + + def on_error(self, message: str): + logger.info(f"speech synthesis task failed, {message}") + + def on_close(self): + logger.info("websocket is closed.") + self.closed = True + + def on_event(self, message): + pass + # logger.info(f"recv speech synthsis message {message}") + + def get_frame(self, data: bytes) -> PcmFrame: + f = PcmFrame.create("pcm_frame") + f.set_sample_rate(self.sample_rate) + f.set_bytes_per_sample(2) + f.set_number_of_channels(1) + # f.set_timestamp = 0 + f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE) + f.set_samples_per_channel(self.sample_rate // 100) + f.alloc_buf(self.frame_size) + buff = f.lock_buf() + if len(data) < self.frame_size: + buff[:] = bytes(self.frame_size) # fill with 0 + buff[: len(data)] = data + f.unlock_buf(buff) + return f + + def cancel(self) -> None: + self.canceled = True + + def on_data(self, data: bytes) -> None: + if self.canceled: + return + + # logger.info("audio result length: %d, %d", len(data), self.frame_size) + try: + chunk = int(len(data) / self.frame_size) + offset = 0 + for i in range(0, chunk): + if self.canceled: + return + f = self.get_frame(data[offset : offset + self.frame_size]) + self.rte.send_pcm_frame(f) + offset += self.frame_size + + if self.canceled: + return + if offset < len(data): + size = len(data) - offset + f = self.get_frame(data[offset : offset + size]) + self.rte.send_pcm_frame(f) + except Exception as e: + logger.exception(e) + + +class CosyTTSExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + self.api_key = "" + self.voice = "" + self.model = "" + self.sample_rate = 16000 + self.tts = None + self.callback = None + self.format = None + self.outdateTs = datetime.now() + + self.stopped = False + self.thread = None + self.queue = queue.Queue() + self.mutex = threading.Lock() + + def on_init(self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo) -> None: + logger.info("CosyTTSExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("CosyTTSExtension on_start") + self.api_key = rte.get_property_string("api_key") + self.voice = rte.get_property_string("voice") + self.model = rte.get_property_string("model") + self.sample_rate = rte.get_property_int("sample_rate") + + dashscope.api_key = self.api_key + f = AudioFormat.PCM_16000HZ_MONO_16BIT + if self.sample_rate == 8000: + f = AudioFormat.PCM_8000HZ_MONO_16BIT + elif self.sample_rate == 16000: + f = AudioFormat.PCM_16000HZ_MONO_16BIT + elif self.sample_rate == 22050: + f = AudioFormat.PCM_22050HZ_MONO_16BIT + elif self.sample_rate == 24000: + f = AudioFormat.PCM_24000HZ_MONO_16BIT + elif self.sample_rate == 44100: + f = AudioFormat.PCM_44100HZ_MONO_16BIT + elif self.sample_rate == 48000: + f = AudioFormat.PCM_48000HZ_MONO_16BIT + else: + logger.info("unknown sample rate %d", self.sample_rate) + exit() + + self.format = f + + self.thread = threading.Thread(target=self.async_handle, args=[rte]) + self.thread.start() + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("CosyTTSExtension on_stop") + + self.stopped = True + self.queue.put(None) + self.flush() + self.thread.join() + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("CosyTTSExtension on_deinit") + rte.on_deinit_done() + + def need_interrupt(self, ts: datetime.time) -> bool: + return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 + + def async_handle(self, rte: RteEnv): + try: + tts = None + callback = None + while not self.stopped: + try: + value = self.queue.get() + if value is None: + break + inputText, ts = value + if len(inputText) == 0: + logger.warning("empty input for interrupt") + if tts is not None: + try: + tts.streaming_cancel() + except Exception as e: + logger.exception(e) + if callback is not None: + callback.cancel() + tts = None + callback = None + continue + + if self.need_interrupt(ts): + continue + + if callback is not None and callback.closed is True: + tts = None + + if tts is None: + logger.info("creating tts") + callback = CosyTTSCallback(rte, self.sample_rate) + tts = SpeechSynthesizer( + model=self.model, + voice=self.voice, + format=self.format, + callback=callback, + ) + + logger.info("on message %s", inputText) + tts.streaming_call(inputText) + except Exception as e: + logger.exception(e) + logger.exception(traceback.format_exc()) + finally: + if tts is not None: + tts.streaming_complete() + + def flush(self): + logger.info("CosyTTSExtension flush") + while not self.queue.empty(): + self.queue.get() + self.queue.put(("", datetime.now())) + + def on_data(self, rte: RteEnv, data: Data) -> None: + logger.info("CosyTTSExtension on_data") + inputText = data.get_property_string("text") + if len(inputText) == 0: + logger.info("ignore empty text") + return + + is_end = data.get_property_bool("end_of_segment") + + logger.info("on data %s %d", inputText, is_end) + self.queue.put((inputText, datetime.now())) + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("CosyTTSExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("CosyTTSExtension on_cmd json: %s" + cmd_json) + + cmdName = cmd.get_name() + if cmdName == "flush": + self.outdateTs = datetime.now() + self.flush() + cmd_out = Cmd.create("flush") + rte.send_cmd( + cmd_out, lambda rte, result: print("DefaultExtension send_cmd done") + ) + else: + logger.info("unknown cmd %s", cmdName) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) diff --git a/agents/addon/extension/cosy_tts/log.py b/agents/addon/extension/cosy_tts/log.py new file mode 100644 index 00000000..83e89596 --- /dev/null +++ b/agents/addon/extension/cosy_tts/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("COSY_TTS") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/cosy_tts/manifest.json b/agents/addon/extension/cosy_tts/manifest.json new file mode 100644 index 00000000..8690db03 --- /dev/null +++ b/agents/addon/extension/cosy_tts/manifest.json @@ -0,0 +1,54 @@ +{ + "type": "extension", + "name": "cosy_tts", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "voice": { + "type": "string" + }, + "model": { + "type": "string" + }, + "sample_rate": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "pcm_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/cosy_tts/property.json b/agents/addon/extension/cosy_tts/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/cosy_tts/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/cosy_tts/requirements.txt b/agents/addon/extension/cosy_tts/requirements.txt new file mode 100644 index 00000000..f1c09c9e --- /dev/null +++ b/agents/addon/extension/cosy_tts/requirements.txt @@ -0,0 +1 @@ +dashscope==1.20.0 \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go index 3b6ef4fd..4ca3b9de 100644 --- a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go +++ b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go @@ -18,7 +18,7 @@ import ( "sync/atomic" "time" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) const ( @@ -50,7 +50,7 @@ var ( ) type elevenlabsTTSExtension struct { - rtego.DefaultExtension + rte.DefaultExtension elevenlabsTTS *elevenlabsTTS } @@ -59,7 +59,7 @@ type message struct { receivedTs int64 } -func newElevenlabsTTSExtension(name string) rtego.Extension { +func newElevenlabsTTSExtension(name string) rte.Extension { return &elevenlabsTTSExtension{} } @@ -75,7 +75,7 @@ func newElevenlabsTTSExtension(name string) rtego.Extension { // - stability // - style // - voice_id -func (e *elevenlabsTTSExtension) OnStart(rte rtego.Rte) { +func (e *elevenlabsTTSExtension) OnStart(rte rte.RteEnv) { slog.Info("OnStart", logTag) // prepare configuration @@ -266,13 +266,14 @@ func (e *elevenlabsTTSExtension) OnStart(rte rtego.Rte) { // example: // {"name": "flush"} func (e *elevenlabsTTSExtension) OnCmd( - rte rtego.Rte, - cmd rtego.Cmd, + rteEnv rte.RteEnv, + cmd rte.Cmd, ) { - cmdName, err := cmd.CmdName() + cmdName, err := cmd.GetName() if err != nil { slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } @@ -283,23 +284,26 @@ func (e *elevenlabsTTSExtension) OnCmd( outdateTs.Store(time.Now().UnixMicro()) // send out - outCmd, err := rtego.NewCmd(cmdOutFlush) + outCmd, err := rte.NewCmd(cmdOutFlush) if err != nil { slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } - if err := rte.SendCmd(outCmd, nil); err != nil { + if err := rteEnv.SendCmd(outCmd, nil); err != nil { slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } else { slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) } } - rte.ReturnString(rtego.Ok, "ok", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Ok) + rteEnv.ReturnResult(cmdResult, cmd) } // OnData receives data from rte graph. @@ -308,8 +312,8 @@ func (e *elevenlabsTTSExtension) OnCmd( // example: // {name: text_data, properties: {text: "hello"} func (e *elevenlabsTTSExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { text, err := data.GetPropertyString(dataInTextDataPropertyText) if err != nil { @@ -333,8 +337,8 @@ func init() { slog.Info("elevenlabs_tts extension init", logTag) // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "elevenlabs_tts", - rtego.NewDefaultExtensionAddon(newElevenlabsTTSExtension), + rte.NewDefaultExtensionAddon(newElevenlabsTTSExtension), ) } diff --git a/agents/addon/extension/elevenlabs_tts/manifest.json b/agents/addon/extension/elevenlabs_tts/manifest.json index 620fb224..063c2e84 100644 --- a/agents/addon/extension/elevenlabs_tts/manifest.json +++ b/agents/addon/extension/elevenlabs_tts/manifest.json @@ -4,15 +4,10 @@ "version": "0.1.0", "language": "go", "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { @@ -71,4 +66,4 @@ } ] } -} +} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts/pcm.go b/agents/addon/extension/elevenlabs_tts/pcm.go index c3454b10..c135d9d4 100644 --- a/agents/addon/extension/elevenlabs_tts/pcm.go +++ b/agents/addon/extension/elevenlabs_tts/pcm.go @@ -14,7 +14,7 @@ import ( "fmt" "log/slog" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) type pcm struct { @@ -49,8 +49,8 @@ func newPcm(config *pcmConfig) *pcm { } } -func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rtego.PcmFrame, err error) { - pcmFrame, err = rtego.NewPcmFrame(p.config.Name) +func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rte.PcmFrame, err error) { + pcmFrame, err = rte.NewPcmFrame(p.config.Name) if err != nil { slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) return @@ -62,20 +62,20 @@ func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rtego.PcmFrame, err error) { pcmFrame.SetChannelLayout(p.config.ChannelLayout) pcmFrame.SetNumberOfChannels(p.config.Channel) pcmFrame.SetTimestamp(p.config.Timestamp) - pcmFrame.SetDataFmt(rtego.PcmFrameDataFmtInterleave) + pcmFrame.SetDataFmt(rte.PcmFrameDataFmtInterleave) pcmFrame.SetSamplesPerChannel(p.config.SamplesPerChannel) pcmFrame.AllocBuf(p.getPcmFrameSize()) - borrowedBuf, err := pcmFrame.BorrowBuf() + borrowedBuf, err := pcmFrame.LockBuf() if err != nil { - slog.Error(fmt.Sprintf("BorrowBuf failed, err: %v", err), logTag) + slog.Error(fmt.Sprintf("LockBuf failed, err: %v", err), logTag) return } // copy data copy(borrowedBuf, buf) - pcmFrame.GiveBackBuf(&borrowedBuf) + pcmFrame.UnlockBuf(&borrowedBuf) return } @@ -87,7 +87,7 @@ func (p *pcm) newBuf() []byte { return make([]byte, p.getPcmFrameSize()) } -func (p *pcm) send(rte rtego.Rte, buf []byte) (err error) { +func (p *pcm) send(rteEnv rte.RteEnv, buf []byte) (err error) { pcmFrame, err := p.getPcmFrame(buf) if err != nil { slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) @@ -95,7 +95,7 @@ func (p *pcm) send(rte rtego.Rte, buf []byte) (err error) { } // send pcm - if err = rte.SendPcmFrame(pcmFrame); err != nil { + if err = rteEnv.SendPcmFrame(pcmFrame); err != nil { slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) return } diff --git a/agents/addon/extension/elevenlabs_tts_python/__init__.py b/agents/addon/extension/elevenlabs_tts_python/__init__.py new file mode 100644 index 00000000..8cf7e25f --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/__init__.py @@ -0,0 +1,5 @@ +from . import elevenlabs_tts_addon +from .log import logger + + +logger.info("elevenlabs_tts_python extension loaded") diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py new file mode 100644 index 00000000..6fe1b72c --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py @@ -0,0 +1,67 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from typing import Iterator +from elevenlabs import Voice, VoiceSettings +from elevenlabs.client import ElevenLabs + + +class ElevenlabsTTSConfig: + def __init__( + self, + api_key="", + model_id="eleven_multilingual_v2", + optimize_streaming_latency=0, + request_timeout_seconds=30, + similarity_boost=0.75, + speaker_boost=False, + stability=0.5, + style=0.0, + voice_id="pNInz6obpgDQGcFmaJgB", + ) -> None: + self.api_key = api_key + self.model_id = model_id + self.optimize_streaming_latency = optimize_streaming_latency + self.request_timeout_seconds = request_timeout_seconds + self.similarity_boost = similarity_boost + self.speaker_boost = speaker_boost + self.stability = stability + self.style = style + self.voice_id = voice_id + + +def default_elevenlabs_tts_config() -> ElevenlabsTTSConfig: + return ElevenlabsTTSConfig() + + +class ElevenlabsTTS: + def __init__(self, config: ElevenlabsTTSConfig) -> None: + self.config = config + self.client = ElevenLabs( + api_key=config.api_key, timeout=config.request_timeout_seconds + ) + + def text_to_speech_stream(self, text: str) -> Iterator[bytes]: + audio_stream = self.client.generate( + text=text, + model=self.config.model_id, + optimize_streaming_latency=self.config.optimize_streaming_latency, + output_format="pcm_16000", + stream=True, + voice=Voice( + voice_id=self.config.voice_id, + settings=VoiceSettings( + stability=self.config.stability, + similarity_boost=self.config.similarity_boost, + style=self.config.style, + speaker_boost=self.config.speaker_boost, + ), + ), + ) + + return audio_stream diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py new file mode 100644 index 00000000..90d2bd4a --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py @@ -0,0 +1,33 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .log import logger + + +@register_addon_as_extension("elevenlabs_tts_python") +class ElevenlabsTTSExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + from .elevenlabs_tts_extension import ElevenlabsTTSExtension + + rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py new file mode 100644 index 00000000..803ed5c2 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py @@ -0,0 +1,298 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +import queue +import threading +import time + +from rte import ( + Addon, + Extension, + register_addon_as_extension, + RteEnv, + Cmd, + CmdResult, + StatusCode, + Data, + MetadataInfo, +) +from .elevenlabs_tts import default_elevenlabs_tts_config, ElevenlabsTTS +from .pcm import PcmConfig, Pcm +from .log import logger + +CMD_IN_FLUSH = "flush" +CMD_OUT_FLUSH = "flush" + +DATA_IN_TEXT_DATA_PROPERTY_TEXT = "text" + +PROPERTY_API_KEY = "api_key" # Required +PROPERTY_MODEL_ID = "model_id" # Optional +PROPERTY_OPTIMIZE_STREAMING_LATENCY = "optimize_streaming_latency" # Optional +PROPERTY_REQUEST_TIMEOUT_SECONDS = "request_timeout_seconds" # Optional +PROPERTY_SIMILARITY_BOOST = "similarity_boost" # Optional +PROPERTY_SPEAKER_BOOST = "speaker_boost" # Optional +PROPERTY_STABILITY = "stability" # Optional +PROPERTY_STYLE = "style" # Optional + + +class Message: + def __init__(self, text: str, received_ts: int) -> None: + self.text = text + self.received_ts = received_ts + + +class ElevenlabsTTSExtension(Extension): + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("on_init") + + self.elevenlabs_tts = None + self.outdate_ts = 0 + self.pcm = None + self.pcm_frame_size = 0 + self.text_queue = queue.Queue(maxsize=1024) + + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("on_start") + + # prepare configuration + elevenlabs_tts_config = default_elevenlabs_tts_config() + + try: + elevenlabs_tts_config.api_key = rte.get_property_string(PROPERTY_API_KEY) + except Exception as e: + logger.warning( + f"on_start get_property_string {PROPERTY_API_KEY} error: {e}" + ) + return + + try: + model_id = rte.get_property_string(PROPERTY_MODEL_ID) + if len(model_id) > 0: + elevenlabs_tts_config.model_id = model_id + except Exception as e: + logger.warning( + f"on_start get_property_string {PROPERTY_MODEL_ID} error: {e}" + ) + + try: + optimize_streaming_latency = rte.get_property_int( + PROPERTY_OPTIMIZE_STREAMING_LATENCY + ) + if optimize_streaming_latency > 0: + elevenlabs_tts_config.optimize_streaming_latency = ( + optimize_streaming_latency + ) + except Exception as e: + logger.warning( + f"on_start get_property_int {PROPERTY_OPTIMIZE_STREAMING_LATENCY} error: {e}" + ) + + try: + request_timeout_seconds = rte.get_property_int( + PROPERTY_REQUEST_TIMEOUT_SECONDS + ) + if request_timeout_seconds > 0: + elevenlabs_tts_config.request_timeout_seconds = request_timeout_seconds + except Exception as e: + logger.warning( + f"on_start get_property_int {PROPERTY_REQUEST_TIMEOUT_SECONDS} error: {e}" + ) + + try: + elevenlabs_tts_config.similarity_boost = rte.get_property_float( + PROPERTY_SIMILARITY_BOOST + ) + except Exception as e: + logger.warning( + f"on_start get_property_float {PROPERTY_SIMILARITY_BOOST} error: {e}" + ) + + try: + elevenlabs_tts_config.speaker_boost = rte.get_property_bool( + PROPERTY_SPEAKER_BOOST + ) + except Exception as e: + logger.warning( + f"on_start get_property_bool {PROPERTY_SPEAKER_BOOST} error: {e}" + ) + + try: + elevenlabs_tts_config.stability = rte.get_property_float(PROPERTY_STABILITY) + except Exception as e: + logger.warning( + f"on_start get_property_float {PROPERTY_STABILITY} error: {e}" + ) + + try: + elevenlabs_tts_config.style = rte.get_property_float(PROPERTY_STYLE) + except Exception as e: + logger.warning(f"on_start get_property_float {PROPERTY_STYLE} error: {e}") + + # create elevenlabsTTS instance + self.elevenlabs_tts = ElevenlabsTTS(elevenlabs_tts_config) + + logger.info( + f"ElevenlabsTTS succeed with model_id: {self.elevenlabs_tts.config.model_id}, VoiceId: {self.elevenlabs_tts.config.voice_id}" + ) + + # create pcm instance + self.pcm = Pcm(PcmConfig()) + self.pcm_frame_size = self.pcm.get_pcm_frame_size() + + threading.Thread(target=self.process_text_queue, args=(rte,)).start() + + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + """ + on_cmd receives cmd from rte graph. + current supported cmd: + - name: flush + example: + {"name": "flush"} + """ + logger.info("on_cmd") + cmd_name = cmd.get_name() + + logger.info(f"on_cmd [{cmd_name}]") + + if cmd_name is CMD_IN_FLUSH: + self.outdate_ts = int(time.time() * 1000000) + + # send out + out_cmd = Cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(out_cmd) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: RteEnv, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + logger.info("on_data") + + try: + text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) + except Exception as e: + logger.warning( + f"on_data get_property_string {DATA_IN_TEXT_DATA_PROPERTY_TEXT} error: {e}" + ) + return + + if len(text) == 0: + logger.debug("on_data text is empty, ignored") + return + + logger.info(f"OnData input text: [{text}]") + + self.text_queue.put(Message(text, int(time.time() * 1000000))) + + def process_text_queue(self, rte: RteEnv): + logger.info("process_text_queue") + + while True: + msg = self.text_queue.get() + logger.debug(f"process_text_queue, text: [{msg.text}]") + + if msg.received_ts < self.outdate_ts: + logger.info( + f"textChan interrupt and flushing for input text: [{msg.text}], received_ts: {msg.received_ts}, outdate_ts: {self.outdate_ts}" + ) + continue + + start_time = time.time() + buf = self.pcm.new_buf() + first_frame_latency = 0 + n = 0 + pcm_frame_read = 0 + read_bytes = 0 + sent_frames = 0 + + audio_stream = self.elevenlabs_tts.text_to_speech_stream(msg.text) + + for chunk in self.pcm.read_pcm_stream(audio_stream, self.pcm_frame_size): + if msg.received_ts < self.outdate_ts: + logger.info( + f"textChan interrupt and flushing for input text: [{msg.text}], received_ts: {msg.received_ts}, outdate_ts: {self.outdate_ts}" + ) + break + + if not chunk: + logger.info("read pcm stream EOF") + break + + n = len(chunk) + read_bytes += n + pcm_frame_read += n + + if pcm_frame_read != self.pcm.get_pcm_frame_size(): + logger.debug( + f"the number of bytes read is [{pcm_frame_read}] inconsistent with pcm frame size", + ) + continue + + self.pcm.send(rte, buf) + buf = self.pcm.new_buf() + pcm_frame_read = 0 + sent_frames += 1 + + if first_frame_latency == 0: + first_frame_latency = int((time.time() - start_time) * 1000) + logger.info( + f"first frame available for text: [{msg.text}], received_ts: {msg.received_ts}, first_frame_latency: {first_frame_latency}ms", + ) + + logger.debug(f"sending pcm data, text: [{msg.text}]") + + if pcm_frame_read > 0: + self.pcm.send(rte, buf) + sent_frames += 1 + logger.info( + f"sending pcm remain data, text: [{msg.text}], pcm_frame_read: {pcm_frame_read}" + ) + + finish_latency = int((time.time() - start_time) * 1000) + logger.info( + f"send pcm data finished, text: [{msg.text}], received_ts: {msg.received_ts}, read_bytes: {read_bytes}, sent_frames: {sent_frames}, \ + first_frame_latency: {first_frame_latency}ms, finish_latency: {finish_latency}ms" + ) + + +@register_addon_as_extension("elevenlabs_tts_python") +class ElevenlabsTTSExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/elevenlabs_tts_python/log.py b/agents/addon/extension/elevenlabs_tts_python/log.py new file mode 100644 index 00000000..54f870f3 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("elevenlabs_tts_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/elevenlabs_tts_python/manifest.json b/agents/addon/extension/elevenlabs_tts_python/manifest.json new file mode 100644 index 00000000..7cf5c061 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/manifest.json @@ -0,0 +1,69 @@ +{ + "type": "extension", + "name": "elevenlabs_tts_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model_id": { + "type": "string" + }, + "request_timeout_seconds": { + "type": "int64" + }, + "similarity_boost": { + "type": "float64" + }, + "speaker_boost": { + "type": "bool" + }, + "stability": { + "type": "float64" + }, + "style": { + "type": "float64" + }, + "optimize_streaming_latency": { + "type": "int64" + }, + "voice_id": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "pcm_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts_python/pcm.py b/agents/addon/extension/elevenlabs_tts_python/pcm.py new file mode 100644 index 00000000..8f73c87b --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/pcm.py @@ -0,0 +1,74 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +import logging +from typing import Iterator +from rte import PcmFrame, RteEnv, RTE_PCM_FRAME_DATA_FMT + + +class Pcm: + def __init__(self, config) -> None: + self.config = config + + def get_pcm_frame(self, buf: memoryview) -> PcmFrame: + frame = PcmFrame.create(self.config.name) + frame.set_bytes_per_sample(self.config.bytes_per_sample) + frame.set_sample_rate(self.config.sample_rate) + frame.set_number_of_channels(self.config.num_channels) + frame.set_timestamp(self.config.timestamp) + frame.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_NON_INTERLEAVE) + frame.set_samples_per_channel( + self.config.samples_per_channel // self.config.channel + ) + + frame.alloc_buf(self.get_pcm_frame_size()) + frame_buf = frame.lock_buf() + # copy data + frame_buf[:] = buf + frame.unlock_buf(frame_buf) + + return frame + + def get_pcm_frame_size(self) -> int: + return ( + self.config.samples_per_channel + * self.config.channel + * self.config.bytes_per_sample + ) + + def new_buf(self) -> bytearray: + return bytearray(self.get_pcm_frame_size()) + + def read_pcm_stream( + self, stream: Iterator[bytes], chunk_size: int + ) -> Iterator[bytes]: + chunk = b"" + for data in stream: + chunk += data + while len(chunk) >= chunk_size: + yield chunk[:chunk_size] + chunk = chunk[chunk_size:] + if chunk: + yield chunk + + def send(self, rte: RteEnv, buf: memoryview) -> None: + try: + frame = self.get_pcm_frame(buf) + rte.send_pcm_frame(frame) + except Exception as e: + logging.error(f"send frame failed, {e}") + + +class PcmConfig: + def __init__(self) -> None: + self.bytes_per_sample = 2 + self.channel = 1 + self.name = "pcm_frame" + self.sample_rate = 16000 + self.samples_per_channel = 16000 // 100 + self.timestamp = 0 diff --git a/agents/addon/extension/elevenlabs_tts_python/property.json b/agents/addon/extension/elevenlabs_tts_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts_python/requirements.txt b/agents/addon/extension/elevenlabs_tts_python/requirements.txt new file mode 100644 index 00000000..3dff4c02 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/requirements.txt @@ -0,0 +1 @@ +elevenlabs==1.4.1 diff --git a/agents/addon/extension/interrupt_detector/extension.go b/agents/addon/extension/interrupt_detector/extension.go index 962940e0..8fdc201b 100644 --- a/agents/addon/extension/interrupt_detector/extension.go +++ b/agents/addon/extension/interrupt_detector/extension.go @@ -14,7 +14,7 @@ import ( "fmt" "log/slog" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) const ( @@ -29,10 +29,10 @@ var ( ) type interruptDetectorExtension struct { - rtego.DefaultExtension + rte.DefaultExtension } -func newExtension(name string) rtego.Extension { +func newExtension(name string) rte.Extension { return &interruptDetectorExtension{} } @@ -42,8 +42,8 @@ func newExtension(name string) rtego.Extension { // example: // {name: text_data, properties: {text: "hello", is_final: false} func (p *interruptDetectorExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { text, err := data.GetPropertyString(textDataTextField) if err != nil { @@ -60,8 +60,8 @@ func (p *interruptDetectorExtension) OnData( slog.Debug(fmt.Sprintf("OnData %s: %s %s: %t", textDataTextField, text, textDataFinalField, final), logTag) if final || len(text) >= 2 { - flushCmd, _ := rtego.NewCmd(cmdNameFlush) - rte.SendCmd(flushCmd, nil) + flushCmd, _ := rte.NewCmd(cmdNameFlush) + rteEnv.SendCmd(flushCmd, nil) slog.Info(fmt.Sprintf("sent cmd: %s", cmdNameFlush), logTag) } @@ -71,8 +71,8 @@ func init() { slog.Info("interrupt_detector extension init", logTag) // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "interrupt_detector", - rtego.NewDefaultExtensionAddon(newExtension), + rte.NewDefaultExtensionAddon(newExtension), ) } diff --git a/agents/addon/extension/interrupt_detector/manifest.json b/agents/addon/extension/interrupt_detector/manifest.json index 05781cf5..2a29ba1f 100644 --- a/agents/addon/extension/interrupt_detector/manifest.json +++ b/agents/addon/extension/interrupt_detector/manifest.json @@ -4,15 +4,10 @@ "version": "0.1.0", "language": "go", "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/interrupt_detector_python/__init__.py b/agents/addon/extension/interrupt_detector_python/__init__.py new file mode 100644 index 00000000..8692cc02 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/__init__.py @@ -0,0 +1,4 @@ +from . import interrupt_detector_addon +from .log import logger + +logger.info("interrupt_detector_python extension loaded") diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py new file mode 100644 index 00000000..407d7816 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py @@ -0,0 +1,34 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .log import logger + + +@register_addon_as_extension("interrupt_detector_python") +class InterruptDetectorExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .interrupt_detector_extension import InterruptDetectorExtension + + rte.on_create_instance_done(InterruptDetectorExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py new file mode 100644 index 00000000..510b4e1e --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -0,0 +1,99 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte import ( + Extension, + RteEnv, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +from .log import logger + + +CMD_NAME_FLUSH = "flush" + +TEXT_DATA_TEXT_FIELD = "text" +TEXT_DATA_FINAL_FIELD = "is_final" + + +class InterruptDetectorExtension(Extension): + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("on_start") + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("on_cmd") + cmd_json = cmd.to_json() + logger.info("on_cmd json: " % cmd_json) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: RteEnv, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello", is_final: false} + """ + logger.info(f"on_data") + + try: + text = data.get_property_string(TEXT_DATA_TEXT_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_string {TEXT_DATA_TEXT_FIELD} error: {e}" + ) + return + + try: + final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_bool {TEXT_DATA_FINAL_FIELD} error: {e}" + ) + return + + logger.debug( + f"on_data {TEXT_DATA_TEXT_FIELD}: {text} {TEXT_DATA_FINAL_FIELD}: {final}" + ) + + if final or len(text) >= 2: + flush_cmd = Cmd.create(CMD_NAME_FLUSH) + rte.send_cmd( + flush_cmd, + lambda rte, result: print( + "InterruptDetectorExtensionAddon send_cmd done" + ), + ) + + logger.info(f"sent cmd: {CMD_NAME_FLUSH}") + + d = Data.create("text_data") + d.set_property_bool(TEXT_DATA_FINAL_FIELD, final) + d.set_property_string(TEXT_DATA_TEXT_FIELD, text) + rte.send_data(d) diff --git a/agents/addon/extension/interrupt_detector_python/log.py b/agents/addon/extension/interrupt_detector_python/log.py new file mode 100644 index 00000000..303d06e1 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("interrupt_detector_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/interrupt_detector_python/manifest.json b/agents/addon/extension/interrupt_detector_python/manifest.json new file mode 100644 index 00000000..692aea02 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/manifest.json @@ -0,0 +1,46 @@ +{ + "type": "extension", + "name": "interrupt_detector_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector_python/property.json b/agents/addon/extension/interrupt_detector_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/manifest.json b/agents/addon/extension/openai_chatgpt/manifest.json index aa4f6d50..a2fc4f61 100644 --- a/agents/addon/extension/openai_chatgpt/manifest.json +++ b/agents/addon/extension/openai_chatgpt/manifest.json @@ -5,15 +5,10 @@ "language": "go", "support": [], "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go index 6abdaa51..a4492f67 100644 --- a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go +++ b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go @@ -19,7 +19,7 @@ import ( "sync/atomic" "time" - "agora.io/rte/rtego" + "agora.io/rte/rte" openai "github.com/sashabaranov/go-openai" ) @@ -28,7 +28,7 @@ var ( ) type openaiChatGPTExtension struct { - rtego.DefaultExtension + rte.DefaultExtension openaiChatGPT *openaiChatGPT } @@ -63,7 +63,7 @@ var ( wg sync.WaitGroup ) -func newChatGPTExtension(name string) rtego.Extension { +func newChatGPTExtension(name string) rte.Extension { return &openaiChatGPTExtension{} } @@ -80,13 +80,13 @@ func newChatGPTExtension(name string) rtego.Extension { // - max_tokens // - greeting // - proxy_url -func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { +func (p *openaiChatGPTExtension) OnStart(rteEnv rte.RteEnv) { slog.Info("OnStart", logTag) // prepare configuration openaiChatGPTConfig := defaultOpenaiChatGPTConfig() - if baseUrl, err := rte.GetPropertyString(propertyBaseUrl); err != nil { + if baseUrl, err := rteEnv.GetPropertyString(propertyBaseUrl); err != nil { slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyBaseUrl, err), logTag) } else { if len(baseUrl) > 0 { @@ -94,14 +94,14 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if apiKey, err := rte.GetPropertyString(propertyApiKey); err != nil { + if apiKey, err := rteEnv.GetPropertyString(propertyApiKey); err != nil { slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) return } else { openaiChatGPTConfig.ApiKey = apiKey } - if model, err := rte.GetPropertyString(propertyModel); err != nil { + if model, err := rteEnv.GetPropertyString(propertyModel); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) } else { if len(model) > 0 { @@ -109,7 +109,7 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if prompt, err := rte.GetPropertyString(propertyPrompt); err != nil { + if prompt, err := rteEnv.GetPropertyString(propertyPrompt); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) } else { if len(prompt) > 0 { @@ -117,31 +117,31 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if frequencyPenalty, err := rte.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { + if frequencyPenalty, err := rteEnv.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyFrequencyPenalty, err), logTag) } else { openaiChatGPTConfig.FrequencyPenalty = float32(frequencyPenalty) } - if presencePenalty, err := rte.GetPropertyFloat64(propertyPresencePenalty); err != nil { + if presencePenalty, err := rteEnv.GetPropertyFloat64(propertyPresencePenalty); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyPresencePenalty, err), logTag) } else { openaiChatGPTConfig.PresencePenalty = float32(presencePenalty) } - if temperature, err := rte.GetPropertyFloat64(propertyTemperature); err != nil { + if temperature, err := rteEnv.GetPropertyFloat64(propertyTemperature); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) } else { openaiChatGPTConfig.Temperature = float32(temperature) } - if topP, err := rte.GetPropertyFloat64(propertyTopP); err != nil { + if topP, err := rteEnv.GetPropertyFloat64(propertyTopP); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) } else { openaiChatGPTConfig.TopP = float32(topP) } - if maxTokens, err := rte.GetPropertyInt64(propertyMaxTokens); err != nil { + if maxTokens, err := rteEnv.GetPropertyInt64(propertyMaxTokens); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) } else { if maxTokens > 0 { @@ -149,18 +149,18 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if proxyUrl, err := rte.GetPropertyString(propertyProxyUrl); err != nil { + if proxyUrl, err := rteEnv.GetPropertyString(propertyProxyUrl); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyProxyUrl, err), logTag) } else { openaiChatGPTConfig.ProxyUrl = proxyUrl } - greeting, err := rte.GetPropertyString(propertyGreeting) + greeting, err := rteEnv.GetPropertyString(propertyGreeting) if err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) } - if propMaxMemoryLength, err := rte.GetPropertyInt64(propertyMaxMemoryLength); err != nil { + if propMaxMemoryLength, err := rteEnv.GetPropertyInt64(propertyMaxMemoryLength); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) } else { if propMaxMemoryLength > 0 { @@ -183,17 +183,17 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { // send greeting if available if len(greeting) > 0 { - outputData, _ := rtego.NewData("text_data") + outputData, _ := rte.NewData("text_data") outputData.SetProperty(dataOutTextDataPropertyText, greeting) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := rte.SendData(outputData); err != nil { + if err := rteEnv.SendData(outputData); err != nil { slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) } else { slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) } } - rte.OnStartDone() + rteEnv.OnStartDone() } // OnCmd receives cmd from rte graph. @@ -202,13 +202,14 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { // example: // {"name": "flush"} func (p *openaiChatGPTExtension) OnCmd( - rte rtego.Rte, - cmd rtego.Cmd, + rteEnv rte.RteEnv, + cmd rte.Cmd, ) { - cmdName, err := cmd.CmdName() + cmdName, err := cmd.GetName() if err != nil { slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) @@ -220,21 +221,24 @@ func (p *openaiChatGPTExtension) OnCmd( wg.Wait() // wait for chat completion stream to finish // send out - outCmd, err := rtego.NewCmd(cmdOutFlush) + outCmd, err := rte.NewCmd(cmdOutFlush) if err != nil { slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } - if err := rte.SendCmd(outCmd, nil); err != nil { + if err := rteEnv.SendCmd(outCmd, nil); err != nil { slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } else { slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) } } - rte.ReturnString(rtego.Ok, "ok", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Ok) + rteEnv.ReturnResult(cmdResult, cmd) } // OnData receives data from rte graph. @@ -243,8 +247,8 @@ func (p *openaiChatGPTExtension) OnCmd( // example: // {"name": "text_data", "properties": {"text": "hello", "is_final": true} func (p *openaiChatGPTExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { // Get isFinal isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) @@ -339,14 +343,14 @@ func (p *openaiChatGPTExtension) OnData( slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) // send sentence - outputData, err := rtego.NewData("text_data") + outputData, err := rte.NewData("text_data") if err != nil { slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) break } outputData.SetProperty(dataOutTextDataPropertyText, sentence) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) - if err := rte.SendData(outputData); err != nil { + if err := rteEnv.SendData(outputData); err != nil { slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) break } else { @@ -369,10 +373,10 @@ func (p *openaiChatGPTExtension) OnData( } // send end of segment - outputData, _ := rtego.NewData("text_data") + outputData, _ := rte.NewData("text_data") outputData.SetProperty(dataOutTextDataPropertyText, sentence) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := rte.SendData(outputData); err != nil { + if err := rteEnv.SendData(outputData); err != nil { slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) } else { slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) @@ -384,8 +388,8 @@ func init() { slog.Info("init") // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "openai_chatgpt", - rtego.NewDefaultExtensionAddon(newChatGPTExtension), + rte.NewDefaultExtensionAddon(newChatGPTExtension), ) } diff --git a/agents/addon/extension/openai_chatgpt_python/__init__.py b/agents/addon/extension/openai_chatgpt_python/__init__.py new file mode 100644 index 00000000..42c4cd12 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/__init__.py @@ -0,0 +1,4 @@ +from . import openai_chatgpt_addon +from .log import logger + +logger.info("openai_chatgpt_python extension loaded") diff --git a/agents/addon/extension/openai_chatgpt_python/log.py b/agents/addon/extension/openai_chatgpt_python/log.py new file mode 100644 index 00000000..fa2202da --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("openai_chatgpt_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json new file mode 100644 index 00000000..9fc1b362 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -0,0 +1,83 @@ +{ + "type": "extension", + "name": "openai_chatgpt_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "frequency_penalty": { + "type": "float64" + }, + "presence_penalty": { + "type": "float64" + }, + "temperature": { + "type": "float64" + }, + "top_p": { + "type": "float64" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "base_url": { + "type": "string" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "proxy_url": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py new file mode 100644 index 00000000..5ad5b6cc --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py @@ -0,0 +1,91 @@ +import random +import requests +from openai import OpenAI +from typing import List, Dict, Any, Optional +from .log import logger + + +class OpenAIChatGPTConfig: + def __init__(self, + base_url: str, + api_key: str, + model: str, + prompt: str, + frequency_penalty: float, + presence_penalty: float, + top_p: float, + temperature: float, + max_tokens: int, + seed: Optional[int] = None, + proxy_url: Optional[str] = None): + self.base_url = base_url + self.api_key = api_key + self.model = model + self.prompt = prompt + self.frequency_penalty = frequency_penalty + self.presence_penalty = presence_penalty + self.top_p = top_p + self.temperature = temperature + self.max_tokens = max_tokens + self.seed = seed if seed is not None else random.randint(0, 10000) + self.proxy_url = proxy_url + + @classmethod + def default_config(cls): + return cls( + base_url="https://api.openai.com/v1", + api_key="", + model="gpt-4", # Adjust this to match the equivalent of `openai.GPT4o` in the Python library + prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", + frequency_penalty=0.9, + presence_penalty=0.9, + top_p=1.0, + temperature=0.1, + max_tokens=512, + seed=random.randint(0, 10000), + proxy_url="" + ) + + +class OpenAIChatGPT: + client = None + def __init__(self, config: OpenAIChatGPTConfig): + self.config = config + logger.info(f"OpenAIChatGPT initialized with config: {config.api_key}") + self.client = OpenAI( + api_key=config.api_key, + base_url=config.base_url + ) + self.session = requests.Session() + if config.proxy_url: + proxies = { + "http": config.proxy_url, + "https": config.proxy_url, + } + self.session.proxies.update(proxies) + self.client.session = self.session + + def get_chat_completions_stream(self, messages): + req = { + "model": self.config.model, + "messages": [ + { + "role": "system", + "content": self.config.prompt, + }, + *messages, + ], + "temperature": self.config.temperature, + "top_p": self.config.top_p, + "presence_penalty": self.config.presence_penalty, + "frequency_penalty": self.config.frequency_penalty, + "max_tokens": self.config.max_tokens, + "seed": self.config.seed, + "stream": True, + } + + try: + response = self.client.chat.completions.create(**req) + return response + except Exception as e: + raise Exception(f"CreateChatCompletionStream failed, err: {e}") \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py new file mode 100644 index 00000000..537eda5b --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py @@ -0,0 +1,33 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .log import logger + + +@register_addon_as_extension("openai_chatgpt_python") +class OpenAIChatGPTExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("OpenAIChatGPTExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .openai_chatgpt_extension import OpenAIChatGPTExtension + + rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("OpenAIChatGPTExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py new file mode 100644 index 00000000..6cabe6cb --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -0,0 +1,406 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from .openai_chatgpt import OpenAIChatGPT, OpenAIChatGPTConfig +from datetime import datetime +from threading import Thread +from rte import ( + Addon, + Extension, + register_addon_as_extension, + RteEnv, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +from .log import logger + + +CMD_IN_FLUSH = "flush" +CMD_OUT_FLUSH = "flush" +DATA_IN_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT = "end_of_segment" + +PROPERTY_BASE_URL = "base_url" # Optional +PROPERTY_API_KEY = "api_key" # Required +PROPERTY_MODEL = "model" # Optional +PROPERTY_PROMPT = "prompt" # Optional +PROPERTY_FREQUENCY_PENALTY = "frequency_penalty" # Optional +PROPERTY_PRESENCE_PENALTY = "presence_penalty" # Optional +PROPERTY_TEMPERATURE = "temperature" # Optional +PROPERTY_TOP_P = "top_p" # Optional +PROPERTY_MAX_TOKENS = "max_tokens" # Optional +PROPERTY_GREETING = "greeting" # Optional +PROPERTY_PROXY_URL = "proxy_url" # Optional +PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional + + +def get_current_time(): + # Get the current time + start_time = datetime.now() + # Get the number of microseconds since the Unix epoch + unix_microseconds = int(start_time.timestamp() * 1_000_000) + return unix_microseconds + + +def is_punctuation(char): + if char in [",", ",", ".", "。", "?", "?", "!", "!"]: + return True + return False + + +def parse_sentence(sentence, content): + remain = "" + found_punc = False + + for char in content: + if not found_punc: + sentence += char + else: + remain += char + + if not found_punc and is_punctuation(char): + found_punc = True + + return sentence, remain, found_punc + + +class OpenAIChatGPTExtension(Extension): + memory = [] + max_memory_length = 10 + outdate_ts = 0 + openai_chatgpt = None + + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("OpenAIChatGPTExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("OpenAIChatGPTExtension on_start") + # Prepare configuration + openai_chatgpt_config = OpenAIChatGPTConfig.default_config() + + try: + base_url = rte.get_property_string(PROPERTY_BASE_URL) + if base_url: + openai_chatgpt_config.base_url = base_url + except Exception as err: + logger.info(f"GetProperty required {PROPERTY_BASE_URL} failed, err: {err}") + + try: + api_key = rte.get_property_string(PROPERTY_API_KEY) + openai_chatgpt_config.api_key = api_key + except Exception as err: + logger.info(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") + return + + try: + model = rte.get_property_string(PROPERTY_MODEL) + if model: + openai_chatgpt_config.model = model + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_MODEL} error: {err}") + + try: + prompt = rte.get_property_string(PROPERTY_PROMPT) + if prompt: + openai_chatgpt_config.prompt = prompt + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_PROMPT} error: {err}") + + try: + frequency_penalty = rte.get_property_float(PROPERTY_FREQUENCY_PENALTY) + openai_chatgpt_config.frequency_penalty = float(frequency_penalty) + except Exception as err: + logger.info( + f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}" + ) + + try: + presence_penalty = rte.get_property_float(PROPERTY_PRESENCE_PENALTY) + openai_chatgpt_config.presence_penalty = float(presence_penalty) + except Exception as err: + logger.info( + f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}" + ) + + try: + temperature = rte.get_property_float(PROPERTY_TEMPERATURE) + openai_chatgpt_config.temperature = float(temperature) + except Exception as err: + logger.info( + f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}" + ) + + try: + top_p = rte.get_property_float(PROPERTY_TOP_P) + openai_chatgpt_config.top_p = float(top_p) + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}") + + try: + max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) + if max_tokens > 0: + openai_chatgpt_config.max_tokens = int(max_tokens) + except Exception as err: + logger.info( + f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}" + ) + + try: + proxy_url = rte.get_property_string(PROPERTY_PROXY_URL) + openai_chatgpt_config.proxy_url = proxy_url + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_PROXY_URL} failed, err: {err}") + + try: + greeting = rte.get_property_string(PROPERTY_GREETING) + except Exception as err: + logger.info(f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}") + + try: + prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) + if prop_max_memory_length > 0: + self.max_memory_length = int(prop_max_memory_length) + except Exception as err: + logger.info( + f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}" + ) + + # Create openaiChatGPT instance + try: + self.openai_chatgpt = OpenAIChatGPT(openai_chatgpt_config) + logger.info( + f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.max_tokens}, model: {openai_chatgpt_config.model}" + ) + except Exception as err: + logger.info(f"newOpenaiChatGPT failed, err: {err}") + + # Send greeting if available + if greeting: + try: + output_data = Data.create("text_data") + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) + rte.send_data(output_data) + logger.info(f"greeting [{greeting}] sent") + except Exception as err: + logger.info(f"greeting [{greeting}] send failed, err: {err}") + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("OpenAIChatGPTExtension on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("OpenAIChatGPTExtension on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("OpenAIChatGPTExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("OpenAIChatGPTExtension on_cmd json: " + cmd_json) + + cmd_name = cmd.get_name() + + if cmd_name == CMD_IN_FLUSH: + self.outdate_ts = get_current_time() + cmd_out = Cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(cmd_out, None) + logger.info(f"OpenAIChatGPTExtension on_cmd sent flush") + else: + logger.info(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") + cmd_result = CmdResult.create(StatusCode.ERROR) + cmd_result.set_property_string("detail", "unknown cmd") + rte.return_result(cmd_result, cmd) + return + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: RteEnv, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + logger.info(f"OpenAIChatGPTExtension on_data") + + # Assume 'data' is an object from which we can get properties + try: + is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) + if not is_final: + logger.info("ignore non-final input") + return + except Exception as err: + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) + return + + # Get input text + try: + input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) + if not input_text: + logger.info("ignore empty text") + return + logger.info(f"OnData input text: [{input_text}]") + except Exception as err: + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) + return + + # Prepare memory + if len(self.memory) > self.max_memory_length: + self.memory.pop(0) + self.memory.append({"role": "user", "content": input_text}) + + def chat_completions_stream_worker(start_time, input_text, memory): + try: + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] memory: {memory}" + ) + + # Get result from AI + resp = self.openai_chatgpt.get_chat_completions_stream(memory) + if resp is None: + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] failed" + ) + return + + sentence = "" + full_content = "" + first_sentence_sent = False + + for chat_completions in resp: + if start_time < self.outdate_ts: + logger.info( + f"GetChatCompletionsStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}" + ) + break + + if ( + len(chat_completions.choices) > 0 + and chat_completions.choices[0].delta.content is not None + ): + content = chat_completions.choices[0].delta.content + else: + content = "" + + full_content += content + + while True: + sentence, content, sentence_is_final = parse_sentence( + sentence, content + ) + if len(sentence) == 0 or not sentence_is_final: + logger.info(f"sentence {sentence} is empty or not final") + break + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] got sentence: [{sentence}]" + ) + + # send sentence + try: + output_data = Data.create("text_data") + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False + ) + rte.send_data(output_data) + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] sent sentence [{sentence}]" + ) + except Exception as err: + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}" + ) + break + + sentence = "" + if not first_sentence_sent: + first_sentence_sent = True + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms" + ) + + # remember response as assistant content in memory + memory.append({"role": "assistant", "content": full_content}) + + # send end of segment + try: + output_data = Data.create("text_data") + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) + rte.send_data(output_data) + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent" + ) + except Exception as err: + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}" + ) + + except Exception as e: + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] failed, err: {e}" + ) + + # Start thread to request and read responses from OpenAI + start_time = get_current_time() + thread = Thread( + target=chat_completions_stream_worker, + args=(start_time, input_text, self.memory), + ) + thread.start() + logger.info(f"OpenAIChatGPTExtension on_data end") + # try: + # rte_data = Data.create("text_data") + # rte_data.set_property_string("text", "hello, world, who are you!") + # except Exception as e: + # logger.info(f"on_data new_data error, ", e) + # return + + # rte.send_data(rte_data) + + +@register_addon_as_extension("openai_chatgpt_python") +class OpenAIChatGPTExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("OpenAIChatGPTExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("OpenAIChatGPTExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/openai_chatgpt_python/property.json b/agents/addon/extension/openai_chatgpt_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/requirements.txt b/agents/addon/extension/openai_chatgpt_python/requirements.txt new file mode 100644 index 00000000..de1e7f46 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/requirements.txt @@ -0,0 +1,2 @@ +openai==1.35.13 +requests==2.32.3 \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/__init__.py b/agents/addon/extension/qwen_llm_python/__init__.py new file mode 100644 index 00000000..b5b5c7c2 --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/__init__.py @@ -0,0 +1,3 @@ +from . import qwen_llm_addon + +print("qwen_llm_python extension loaded") diff --git a/agents/addon/extension/qwen_llm_python/log.py b/agents/addon/extension/qwen_llm_python/log.py new file mode 100644 index 00000000..98c23228 --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("qwen_llm_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/qwen_llm_python/manifest.json b/agents/addon/extension/qwen_llm_python/manifest.json new file mode 100644 index 00000000..02b7a98e --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/manifest.json @@ -0,0 +1,71 @@ +{ + "type": "extension", + "name": "qwen_llm_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/property.json b/agents/addon/extension/qwen_llm_python/property.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py new file mode 100644 index 00000000..b635f64e --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py @@ -0,0 +1,33 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte import ( + Addon, + register_addon_as_extension, + RteEnv, +) +from .log import logger + + +@register_addon_as_extension("qwen_llm_python") +class QWenLLMExtensionAddon(Addon): + def on_init(self, rte: RteEnv, manifest, property) -> None: + logger.info("QWenLLMExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: RteEnv, addon_name: str, context): + logger.info("on_create_instance") + + from .qwen_llm_extension import QWenLLMExtension + + rte.on_create_instance_done(QWenLLMExtension(addon_name), context) + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("QWenLLMExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py new file mode 100644 index 00000000..1b34e20b --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py @@ -0,0 +1,249 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte import ( + Extension, + RteEnv, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +from typing import List, Any +import dashscope +import queue +from datetime import datetime +import threading +from http import HTTPStatus +from .log import logger + + +def isEnd(content: str) -> bool: + last = content[len(content) - 1] + return ( + last == "," + or last == "," + or last == "." + or last == "。" + or last == "?" + or last == "?" + or last == "!" + or last == "!" + ) + + +class QWenLLMExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + self.history = [] + self.api_key = "" + self.model = "" + self.prompt = "" + self.max_history = 10 + self.stopped = False + self.thread = None + self.outdateTs = datetime.now() + self.ongoing = "" + + self.queue = queue.Queue() + self.mutex = threading.Lock() + + def on_msg(self, role: str, content: str) -> None: + self.mutex.acquire() + try: + self.history.append({"role": role, "content": content}) + if len(self.history) > self.max_history: + self.history = self.history[1:] + finally: + self.mutex.release() + + def get_messages(self) -> List[Any]: + messages = [] + if len(self.prompt) > 0: + messages.append({"role": "system", "content": self.prompt}) + self.mutex.acquire() + try: + for h in self.history: + messages.append(h) + finally: + self.mutex.release() + return messages + + def need_interrupt(self, ts: datetime.time) -> bool: + return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 + + def call(self, messages: List[Any]): + logger.info("before call %s", messages) + response = dashscope.Generation.call( + "qwen-max", + messages=messages, + result_format="message", # set the result to be "message" format. + stream=False, # set streaming output + incremental_output=False, # get streaming output incrementally + ) + if response.status_code == HTTPStatus.OK: + self.on_msg( + response.output.choices[0]["message"]["role"], + response.output.choices[0]["message"]["content"], + ) + logger.info( + "on response %s", response.output.choices[0]["message"]["content"] + ) + else: + logger.info("Failed to get response %s", response) + + def call_with_stream( + self, rte: RteEnv, ts: datetime.time, inputText: str, messages: List[Any] + ): + if self.need_interrupt(ts): + logger.warning("out of date, %s, %s", self.outdateTs, ts) + return + if len(self.ongoing) > 0: + messages.append({"role": "assistant", "content": self.ongoing}) + messages.append({"role": "user", "content": inputText}) + logger.info("before call %s %s", messages, ts) + + responses = dashscope.Generation.call( + self.model, + messages=messages, + result_format="message", # set the result to be "message" format. + stream=True, # set streaming output + incremental_output=True, # get streaming output incrementally + ) + total = "" + partial = "" + for response in responses: + if self.need_interrupt(ts): + if len(self.ongoing) > 0: + self.on_msg("user", inputText) + self.on_msg("assistant", self.ongoing) + self.ongoing = "" + logger.warning("out of date, %s, %s", self.outdateTs, ts) + return + if response.status_code == HTTPStatus.OK: + temp = response.output.choices[0]["message"]["content"] + if len(temp) == 0: + continue + partial += temp + self.ongoing += temp + if (isEnd(temp) and len(partial) > 10) or len(partial) > 50: + d = Data.create("text_data") + d.set_property_bool("end_of_segment", isEnd(partial)) + d.set_property_string("text", partial) + rte.send_data(d) + total += partial + partial = "" + else: + logger.info( + "Request id: %s, Status code: %s, error code: %s, error message: %s" + % ( + response.request_id, + response.status_code, + response.code, + response.message, + ) + ) + return + if len(partial) > 0: + d = Data.create("text_data") + d.set_property_bool("end_of_segment", True) + d.set_property_string("text", partial) + rte.send_data(d) + total += partial + partial = "" + self.ongoing = "" + self.on_msg("user", inputText) + self.on_msg("assistant", total) + logger.info("on response %s", total) + + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + logger.info("QWenLLMExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: RteEnv) -> None: + logger.info("QWenLLMExtension on_start") + self.api_key = rte.get_property_string("api_key") + self.model = rte.get_property_string("model") + self.prompt = rte.get_property_string("prompt") + self.max_history = rte.get_property_int("max_memory_length") + + dashscope.api_key = self.api_key + self.thread = threading.Thread(target=self.async_handle, args=[rte]) + self.thread.start() + rte.on_start_done() + + def on_stop(self, rte: RteEnv) -> None: + logger.info("QWenLLMExtension on_stop") + self.stopped = True + self.queue.put(None) + self.flush() + self.thread.join() + rte.on_stop_done() + + def on_deinit(self, rte: RteEnv) -> None: + logger.info("QWenLLMExtension on_deinit") + rte.on_deinit_done() + + def flush(self): + logger.info("QWenLLMExtension flush") + while not self.queue.empty(): + self.queue.get() + + def on_data(self, rte: RteEnv, data: Data) -> None: + logger.info("QWenLLMExtension on_data") + is_final = data.get_property_bool("is_final") + if not is_final: + logger.info("ignore non final") + return + + inputText = data.get_property_string("text") + if len(inputText) == 0: + logger.info("ignore empty text") + return + + ts = datetime.now() + + logger.info("on data %s, %s", inputText, ts) + self.queue.put((inputText, ts)) + + def async_handle(self, rte: RteEnv): + while not self.stopped: + try: + value = self.queue.get() + if value is None: + break + inputText, ts = value + if self.need_interrupt(ts): + continue + logger.info("fetch from queue %s", inputText) + history = self.get_messages() + self.call_with_stream(rte, ts, inputText, history) + except Exception as e: + logger.exception(e) + + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: + logger.info("QWenLLMExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("QWenLLMExtension on_cmd json: %s", cmd_json) + + cmdName = cmd.get_name() + if cmdName == "flush": + self.outdateTs = datetime.now() + # self.flush() + cmd_out = Cmd.create("flush") + rte.send_cmd( + cmd_out, + lambda rte, result: print("QWenLLMExtensionAddon send_cmd done"), + ) + else: + logger.info("unknown cmd %s", cmdName) + + cmd_result = CmdResult.create(StatusCode.OK) + rte.return_result(cmd_result, cmd) diff --git a/agents/addon/extension/qwen_llm_python/requirements.txt b/agents/addon/extension/qwen_llm_python/requirements.txt new file mode 100644 index 00000000..f1c09c9e --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/requirements.txt @@ -0,0 +1 @@ +dashscope==1.20.0 \ No newline at end of file diff --git a/agents/main.go b/agents/main.go index 5a2fd1ef..cf91296d 100644 --- a/agents/main.go +++ b/agents/main.go @@ -11,7 +11,7 @@ import ( "flag" "log" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) type appConfig struct { @@ -19,26 +19,26 @@ type appConfig struct { } type defaultApp struct { - rtego.DefaultApp + rte.DefaultApp cfg *appConfig } func (p *defaultApp) OnInit( - rte rtego.Rte, - manifest rtego.MetadataInfo, - property rtego.MetadataInfo, + rteEnv rte.RteEnv, + manifest rte.MetadataInfo, + property rte.MetadataInfo, ) { // Using the default manifest.json if not specified. if len(p.cfg.Manifest) > 0 { - manifest.Set(rtego.MetadataTypeJSONFileName, p.cfg.Manifest) + manifest.Set(rte.MetadataTypeJSONFileName, p.cfg.Manifest) } - rte.OnInitDone(manifest, property) + rteEnv.OnInitDone(manifest, property) } func startAppBlocking(cfg *appConfig) { - appInstance, err := rtego.NewApp(&defaultApp{ + appInstance, err := rte.NewApp(&defaultApp{ cfg: cfg, }) if err != nil { @@ -47,9 +47,9 @@ func startAppBlocking(cfg *appConfig) { appInstance.Run(true) appInstance.Wait() - rtego.UnloadAllAddons() + rte.UnloadAllAddons() - rtego.EnsureCleanupWhenProcessExit() + rte.EnsureCleanupWhenProcessExit() } func setDefaultLog() { diff --git a/agents/main.py b/agents/main.py new file mode 100644 index 00000000..4a4d297c --- /dev/null +++ b/agents/main.py @@ -0,0 +1,68 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from glob import glob +import importlib.util +import os +import argparse +from os.path import dirname + +def log(msg): + print("[PYTHON] {}".format(msg)) + +def process_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--manifest", help="The absolute path of manifest.json" + ) + return parser.parse_args() + +if __name__ == "__main__": + args = process_args() + + basedir = dirname(__file__) + log("app init") + + for module in glob(os.path.join(basedir, "addon/extension/*")): + if os.path.isdir(module): + module_name = os.path.basename(module) + spec = importlib.util.find_spec( + "addon.extension.{}".format(module_name) + ) + if spec is not None: + mod = importlib.import_module( + "addon.extension.{}".format(module_name) + ) + print("imported module: {}".format(module_name)) + + from rte_runtime_python import App, MetadataType + class TestApp(App): + def on_init(self, rte, manifest, property): + log("app on_init") + + # Using the default manifest.json if not specified. + if self.manifest_path: + log("set manifest: {}".format(self.manifest_path)) + manifest.set(MetadataType.JSON_FILENAME, self.manifest_path) + + rte.on_init_done(manifest, property) + + def on_deinit(self, rte) -> None: + log("app on_deinit") + rte.on_deinit_done() + + def set_manifest_path(self, manifest_path): + self.manifest_path = manifest_path + + app = TestApp() + app.set_manifest_path(args.manifest) + log("app created") + + app.run(False) + log("app run done") diff --git a/agents/manifest.json.cn.bedrock.example b/agents/manifest.json.cn.bedrock.example new file mode 100644 index 00000000..0a8ae550 --- /dev/null +++ b/agents/manifest.json.cn.bedrock.example @@ -0,0 +1,210 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.3.0-rc1" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example new file mode 100644 index 00000000..4127fc02 --- /dev/null +++ b/agents/manifest.json.cn.example @@ -0,0 +1,269 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.3.0-rc1" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "zh-CN", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "llm", + "addon": "qwen_llm_python", + "name": "qwen_llm", + "property": { + "api_key": "", + "model": "qwen-max", + "max_tokens": 512, + "prompt": "", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "chat_transcriber", + "addon": "chat_transcriber_python", + "name": "chat_transcriber" + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "llm" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chat_transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ], + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "llm", + "extension": "qwen_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest":[ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/manifest.json.cn.openai.example b/agents/manifest.json.cn.openai.example new file mode 100644 index 00000000..3f0625d6 --- /dev/null +++ b/agents/manifest.json.cn.openai.example @@ -0,0 +1,212 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.3.0-rc1" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt_python", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/manifest.json.elevenlabs.example b/agents/manifest.json.elevenlabs.example index 0f2ee255..e4e45ae6 100644 --- a/agents/manifest.json.elevenlabs.example +++ b/agents/manifest.json.elevenlabs.example @@ -5,34 +5,29 @@ "language": "go", "dependencies": [ { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.1.0" + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" }, { - "type": "extension", - "name": "agora_rtc", - "version": "0.1.1" + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" }, { "type": "extension", - "name": "chat_transcriber", - "version": "0.1.0" + "name": "py_init_extension_cpp", + "version": "0.3.1" }, { - "type": "extension", - "name": "elevenlabs_tts", - "version": "0.1.0" - }, - { - "type": "extension", - "name": "interrupt_detector", - "version": "0.1.0" + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" }, { "type": "extension", - "name": "openai_chatgpt", - "version": "0.1.0" + "name": "agora_rtc", + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.en.bedrock.example b/agents/manifest.json.en.bedrock.example new file mode 100644 index 00000000..0a8ae550 --- /dev/null +++ b/agents/manifest.json.en.bedrock.example @@ -0,0 +1,210 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.3.0-rc1" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/manifest.json.en.example b/agents/manifest.json.en.example new file mode 100644 index 00000000..8bbd42ba --- /dev/null +++ b/agents/manifest.json.en.example @@ -0,0 +1,265 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.3.0-rc1" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "transcriber", + "addon": "chat_transcriber", + "name": "chat_transcriber" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 0130d4e1..8bbd42ba 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -1,38 +1,33 @@ { "type": "app", "name": "astra_agents", - "version": "0.1.0", + "version": "0.2.0", "language": "go", "dependencies": [ { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.1.0" + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" }, { - "type": "extension", - "name": "agora_rtc", - "version": "0.1.1" + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" }, { "type": "extension", - "name": "chat_transcriber", - "version": "0.1.0" + "name": "py_init_extension_cpp", + "version": "0.3.1" }, { - "type": "extension", - "name": "azure_tts", - "version": "0.1.0" - }, - { - "type": "extension", - "name": "interrupt_detector", - "version": "0.1.0" + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" }, { "type": "extension", - "name": "openai_chatgpt", - "version": "0.1.0" + "name": "agora_rtc", + "version": "0.3.0-rc1" } ], "predefined_graphs": [ @@ -145,6 +140,17 @@ } ] } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } ] }, { diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example new file mode 100644 index 00000000..f7481e23 --- /dev/null +++ b/agents/manifest.json.qwen.example @@ -0,0 +1,269 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.3.0-rc1" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "llm", + "addon": "qwen_llm_python", + "name": "qwen_llm", + "property": { + "api_key": "", + "model": "qwen-max", + "max_tokens": 512, + "prompt": "", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "chat_transcriber", + "addon": "chat_transcriber_python", + "name": "chat_transcriber" + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "llm" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chat_transcriber" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ], + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "llm", + "extension": "qwen_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest":[ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/property.json b/agents/property.json index 03c192fc..64810e6a 100644 --- a/agents/property.json +++ b/agents/property.json @@ -2,4 +2,4 @@ "rte": { "log_level": 3 } -} +} \ No newline at end of file diff --git a/agents/scripts/install_deps_and_build.sh b/agents/scripts/install_deps_and_build.sh index 1b0e5acd..9965ac88 100755 --- a/agents/scripts/install_deps_and_build.sh +++ b/agents/scripts/install_deps_and_build.sh @@ -43,6 +43,23 @@ build_cxx_addon() { done } +install_python_requirements() { + local app_dir=$1 + + if [[ -f "requirements.txt" ]]; then + pip install -r requirements.txt + fi + + # traverse the addon/extension directory to find the requirements.txt + if [[ -d "addon/extension" ]]; then + for extension in addon/extension/*; do + if [[ -f "$extension/requirements.txt" ]]; then + pip install -r $extension/requirements.txt + fi + done + fi +} + build_go_app() { local app_dir=$1 cd $app_dir @@ -84,8 +101,12 @@ main() { arpm install # build addons and app + echo "build_cxx_addon..." build_cxx_addon $APP_HOME + echo "build_go_app..." build_go_app $APP_HOME + echo "install_python_requirements..." + install_python_requirements $APP_HOME } main "$@" diff --git a/agents/scripts/package.sh b/agents/scripts/package.sh index ba87f7f3..4c3ba1d0 100755 --- a/agents/scripts/package.sh +++ b/agents/scripts/package.sh @@ -17,6 +17,21 @@ copy_extension() { if [[ -f addon/extension/$extension/manifest.json ]]; then cp addon/extension/$extension/manifest.json .release/addon/extension/$extension/ + + # package .py for python extensions + EXTENSION_LANGUAGE=$(jq -r '.language' addon/extension/$extension/manifest.json) + if [[ $EXTENSION_LANGUAGE == "python" ]]; then + # TODO: package 'publish' contents only + cp addon/extension/$extension/*.py .release/addon/extension/$extension/ + if [[ -f addon/extension/$extension/requirements.txt ]]; then + cp addon/extension/$extension/requirements.txt .release/addon/extension/$extension/ + fi + + # TODO: copy specific contents + if [[ -d addon/extension/$extension/pb ]]; then + cp -r addon/extension/$extension/pb .release/addon/extension/$extension/ + fi + fi fi if [[ -f addon/extension/$extension/property.json ]]; then @@ -27,17 +42,29 @@ copy_extension() { cp -r bin .release cp -r lib .release cp manifest.json .release -cp manifest.elevenlabs.json .release +#cp manifest.elevenlabs.json .release +cp manifest.cn.json .release +cp manifest.en.json .release cp property.json .release -mkdir .release/addon +# python deps +if [[ -d interface/rte ]]; then + mkdir -p .release/interface + cp -r interface/rte .release/interface +fi + +# extension group +mkdir -p .release/addon cp -r addon/extension_group .release/addon/ -cp -r session_control.conf .release/ +# extensions mkdir -p .release/addon/extension - for extension in addon/extension/* do extension_name=$(basename $extension) copy_extension $extension_name done + +if [[ -f session_control.conf ]]; then + cp -r session_control.conf .release/ +fi diff --git a/bedrock_llm/bedrock_llm_extension.go b/bedrock_llm/bedrock_llm_extension.go new file mode 100644 index 00000000..7c21d706 --- /dev/null +++ b/bedrock_llm/bedrock_llm_extension.go @@ -0,0 +1,402 @@ +/** + * + * Agora Real Time Engagement + * Created by lixinhui in 2024. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "fmt" + "log/slog" + "sync" + "sync/atomic" + "time" + + "agora.io/rte/rtego" + "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" +) + +var ( + logTag = slog.String("extension", "BEDROCK_LLM_EXTENSION") +) + +type bedrockLLMExtension struct { + rtego.DefaultExtension + bedrockLLM *bedrockLLM +} + +const ( + cmdInFlush = "flush" + cmdOutFlush = "flush" + dataInTextDataPropertyText = "text" + dataInTextDataPropertyIsFinal = "is_final" + dataOutTextDataPropertyText = "text" + dataOutTextDataPropertyTextEndOfSegment = "end_of_segment" + + propertyRegion = "region" // Optional + propertyAccessKey = "access_key" // Required + propertySecretKey = "secret_key" // Required + propertyModel = "model" // Optional + propertyPrompt = "prompt" // Optional + propertyTemperature = "temperature" // Optional + propertyTopP = "top_p" // Optional + propertyMaxTokens = "max_tokens" // Optional + propertyGreeting = "greeting" // Optional + propertyMaxMemoryLength = "max_memory_length" // Optional +) + +var ( + memory []types.Message + memoryChan chan types.Message + maxMemoryLength = 10 + + outdateTs atomic.Int64 + wg sync.WaitGroup +) + +func newBedrockLLMExtension(name string) rtego.Extension { + return &bedrockLLMExtension{} +} + +// OnStart will be called when the extension is starting, +// properies can be read here to initialize and start the extension. +// current supported properties: +// - region (optional) +// - access_key (required) +// - secret_key (required) +// - prompt +// - temperature +// - top_p +// - max_tokens +// - greeting +func (p *bedrockLLMExtension) OnStart(rte rtego.Rte) { + slog.Info("OnStart", logTag) + + // prepare configuration + bedrockLLMConfig := defaultBedrockLLMConfig() + + if accessKey, err := rte.GetPropertyString(propertyAccessKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyAccessKey, err), logTag) + } else { + if len(accessKey) > 0 { + bedrockLLMConfig.AccessKey = accessKey + } + } + if secretKey, err := rte.GetPropertyString(propertySecretKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertySecretKey, err), logTag) + } else { + if len(secretKey) > 0 { + bedrockLLMConfig.SecretKey = secretKey + } + } + + if model, err := rte.GetPropertyString(propertyModel); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) + } else { + if len(model) > 0 { + bedrockLLMConfig.Model = model + } + } + + if prompt, err := rte.GetPropertyString(propertyPrompt); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) + } else { + if len(prompt) > 0 { + bedrockLLMConfig.Prompt = prompt + } + } + + if temperature, err := rte.GetPropertyFloat64(propertyTemperature); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) + } else { + bedrockLLMConfig.Temperature = float32(temperature) + } + + if topP, err := rte.GetPropertyFloat64(propertyTopP); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) + } else { + bedrockLLMConfig.TopP = float32(topP) + } + + if maxTokens, err := rte.GetPropertyInt64(propertyMaxTokens); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) + } else { + if maxTokens > 0 { + bedrockLLMConfig.MaxTokens = int32(maxTokens) + } + } + + greeting, err := rte.GetPropertyString(propertyGreeting) + if err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) + } + + if propMaxMemoryLength, err := rte.GetPropertyInt64(propertyMaxMemoryLength); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) + } else { + if propMaxMemoryLength > 0 { + maxMemoryLength = int(propMaxMemoryLength) + } + } + + // create bedrockLLM instance + bedrockLLM, err := newBedrockLLM(bedrockLLMConfig) + if err != nil { + slog.Error(fmt.Sprintf("newBedrockLLM failed, err: %v", err), logTag) + return + } + slog.Info(fmt.Sprintf("newBedrockLLM succeed with max_tokens: %d, model: %s", + bedrockLLMConfig.MaxTokens, bedrockLLMConfig.Model), logTag) + + p.bedrockLLM = bedrockLLM + + memoryChan = make(chan types.Message, maxMemoryLength*2) + + // send greeting if available + if len(greeting) > 0 { + outputData, _ := rtego.NewData("text_data") + outputData.SetProperty(dataOutTextDataPropertyText, greeting) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) + } else { + slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) + } + } + + rte.OnStartDone() +} + +// OnCmd receives cmd from rte graph. +// current supported cmd: +// - name: flush +// example: +// {"name": "flush"} +func (p *bedrockLLMExtension) OnCmd( + rte rtego.Rte, + cmd rtego.Cmd, +) { + cmdName, err := cmd.GetName() + if err != nil { + result, fatal := rtego.NewCmdResult(rtego.Error) + + if fatal != nil { + slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + } + rte.ReturnResult(result, cmd) + return + } + slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + + switch cmdName { + case cmdInFlush: + outdateTs.Store(time.Now().UnixMicro()) + + wg.Wait() // wait for chat completion stream to finish + + // send out + outCmd, err := rtego.NewCmd(cmdOutFlush) + if err != nil { + result, fatal := rtego.NewCmdResult(rtego.Error) + + if fatal != nil { + slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + } + rte.ReturnResult(result, cmd) + return + } + if err := rte.SendCmd(outCmd, nil); err != nil { + result, fatal := rtego.NewCmdResult(rtego.Error) + + if fatal != nil { + slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + } + rte.ReturnResult(result, cmd) + return + } else { + slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + } + } + + result, _ := rtego.NewCmdResult(rtego.Ok) + rte.ReturnResult(result, cmd) +} + +// OnData receives data from rte graph. +// current supported data: +// - name: text_data +// example: +// {"name": "text_data", "properties": {"text": "hello", "is_final": true} +func (p *bedrockLLMExtension) OnData( + rte rtego.Rte, + data rtego.Data, +) { + // Get isFinal + isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyIsFinal, err), logTag) + return + } + if !isFinal { // ignore non-final + slog.Debug("ignore non-final input", logTag) + return + } + + // Get input text + inputText, err := data.GetPropertyString(dataInTextDataPropertyText) + if err != nil { + slog.Error(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + return + } + if len(inputText) == 0 { + slog.Debug("ignore empty text", logTag) + return + } + slog.Info(fmt.Sprintf("OnData input text: [%s]", inputText), logTag) + + // prepare memory + for len(memoryChan) > 0 { + m, ok := <-memoryChan + if !ok { + break + } + memory = append(memory, m) + if len(memory) > maxMemoryLength { + memory = memory[1:] + } + } + + memory = append(memory, types.Message{ + Role: types.ConversationRoleUser, + Content: []types.ContentBlock{ + &types.ContentBlockMemberText{ + Value: inputText, + }, + }, + }) + + if len(memory) > maxMemoryLength { + memory = memory[1:] + } + + // start goroutine to request and read responses from bedrock + wg.Add(1) + go func(startTime time.Time, inputText string, memory []types.Message) { + defer wg.Done() + slog.Info(fmt.Sprintf("getConvserseStream for input text: [%s] memory: %v", inputText, memory), logTag) + + // Get result from ai + resp, err := p.bedrockLLM.getConverseStream(memory) + if err != nil { + slog.Error(fmt.Sprintf("getConvserseStream for input text: [%s] failed, err: %v", inputText, err), logTag) + return + } + // defer func() { + // if resp != nil { // Close stream object + // resp.Close() + // } + // }() + slog.Debug(fmt.Sprintf("getConvserseStream start to recv for input text: [%s]", inputText), logTag) + + var sentence, fullContent string + var firstSentenceSent bool + for event := range resp.GetStream().Events() { + if startTime.UnixMicro() < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv interrupt and flushing for input text: [%s], startTs: %d, outdateTs: %d", + inputText, startTime.UnixMicro(), outdateTs.Load()), logTag) + break + } + var content string + + switch v := event.(type) { + // case *types.ConverseStreamOutputMemberMessageStart: + // msg.Role = v.Value.Role + case *types.ConverseStreamOutputMemberContentBlockDelta: + textResponse := v.Value.Delta.(*types.ContentBlockDeltaMemberText) + content = textResponse.Value + + case *types.UnknownUnionMember: + fmt.Println("unknown tag:", v.Tag) + } + + // chatCompletions, err := resp.Recv() + // if errors.Is(err, io.EOF) { + // slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s], io.EOF break", inputText), logTag) + // break + // } + + // if len(chatCompletions.Choices) > 0 && chatCompletions.Choices[0].Delta.Content != "" { + // content = chatCompletions.Choices[0].Delta.Content + // } + fullContent += content + + for { + // feed content and check whether sentence is available + var sentenceIsFinal bool + sentence, content, sentenceIsFinal = parseSentence(sentence, content) + if len(sentence) == 0 || !sentenceIsFinal { + slog.Debug(fmt.Sprintf("sentence %s is empty or not final", sentence), logTag) + break + } + slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) + + // send sentence + outputData, err := rtego.NewData("text_data") + if err != nil { + slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) + break + } + outputData.SetProperty(dataOutTextDataPropertyText, sentence) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) + break + } else { + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] sent sentence [%s]", inputText, sentence), logTag) + } + sentence = "" + + if !firstSentenceSent { + firstSentenceSent = true + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] first sentence sent, first_sentency_latency %dms", + inputText, time.Since(startTime).Milliseconds()), logTag) + } + } + } + + // remember response as assistant content in memory + memoryChan <- types.Message{ + Role: types.ConversationRoleAssistant, + Content: []types.ContentBlock{ + &types.ContentBlockMemberText{ + Value: fullContent, + }, + }, + } + + // send end of segment + outputData, _ := rtego.NewData("text_data") + outputData.SetProperty(dataOutTextDataPropertyText, sentence) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) + } else { + slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) + } + }(time.Now(), inputText, append([]types.Message{}, memory...)) +} + +func init() { + slog.Info("init") + + // Register addon + rtego.RegisterAddonAsExtension( + "bedrock_llm", + rtego.NewDefaultExtensionAddon(newBedrockLLMExtension), + ) +} diff --git a/docs/readmes/README-CN.md b/docs/readmes/README-CN.md index cf4fac5e..6d3dd965 100644 --- a/docs/readmes/README-CN.md +++ b/docs/readmes/README-CN.md @@ -113,9 +113,14 @@ npm i && npm run dev ```shell # rename manifest example cp ./agents/manifest.json.example ./agents/manifest.json +cp ./agents/manifest.json.en.example ./agents/manifest.en.json +cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json # pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev agoraio/astra_agents_build +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 + +# for windows git bash +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 # enter docker image docker exec -it astra_agents_dev bash @@ -134,10 +139,19 @@ export AGORA_APP_ID= export AGORA_APP_CERTIFICATE= export AZURE_STT_KEY= export AZURE_STT_REGION= + +# LLM export OPENAI_API_KEY= +export QWEN_API_KEY= + +# TTS +# cosy +export COSY_TTS_KEY= +# if you use AZURE_TTS export AZURE_TTS_KEY= export AZURE_TTS_REGION= + # agent is ready to start on port 8080 make run-server diff --git a/server/internal/http_server.go b/server/internal/http_server.go index 3158edd3..924bcfdb 100644 --- a/server/internal/http_server.go +++ b/server/internal/http_server.go @@ -74,6 +74,8 @@ const ( ManifestJsonFile = "./agents/manifest.json" ManifestJsonFileElevenlabs = "./agents/manifest.elevenlabs.json" + ManifestJsonFileEN = "./agents/manifest.en.json" + ManifestJsonFileCN = "./agents/manifest.cn.json" TTSVendorAzure = "azure" TTSVendorElevenlabs = "elevenlabs" @@ -116,12 +118,17 @@ func NewHttpServer(httpServerConfig *HttpServerConfig) *HttpServer { } func (s *HttpServer) getManifestJsonFile(language string) (manifestJsonFile string) { - ttsVendor := s.getTtsVendor(language) + // ttsVendor := s.getTtsVendor(language) manifestJsonFile = ManifestJsonFile - if ttsVendor == TTSVendorElevenlabs { - manifestJsonFile = ManifestJsonFileElevenlabs + if language == languageEnglish { + manifestJsonFile = ManifestJsonFileEN + } else if language == languageChinese { + manifestJsonFile = ManifestJsonFileCN } + // if ttsVendor == TTSVendorElevenlabs { + // manifestJsonFile = ManifestJsonFileElevenlabs + // } return } diff --git a/server/main.go b/server/main.go index d190387b..f7c6745f 100644 --- a/server/main.go +++ b/server/main.go @@ -46,8 +46,10 @@ func main() { slog.Info("server config", "ttsVendorChinese", httpServerConfig.TTSVendorChinese, "ttsVendorEnglish", httpServerConfig.TTSVendorEnglish, "workersMax", httpServerConfig.WorkersMax, "workerQuitTimeoutSeconds", httpServerConfig.WorkerQuitTimeoutSeconds) - processManifest(internal.ManifestJsonFile) - processManifest(internal.ManifestJsonFileElevenlabs) + // processManifest(internal.ManifestJsonFile) + // processManifest(internal.ManifestJsonFileElevenlabs) + processManifest(internal.ManifestJsonFileEN) + processManifest(internal.ManifestJsonFileCN) httpServer := internal.NewHttpServer(httpServerConfig) httpServer.Start() } @@ -106,11 +108,41 @@ func processManifest(manifestJsonFile string) (err error) { manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="azure_tts").property.azure_subscription_region`, azureTtsRegion) } + awsRegion := os.Getenv("AWS_REGION") + if awsRegion != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.region`, awsRegion) + } + + awsAccessKey := os.Getenv("AWS_ACCESS_KEY_ID") + if awsAccessKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.access_key`, awsAccessKey) + } + + awsSecretKey := os.Getenv("AWS_SECRET_ACCESS_KEY") + if awsSecretKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.secret_key`, awsSecretKey) + } + + bedrockModel := os.Getenv("AWS_BEDROCK_MODEL") + if bedrockModel != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.model`, bedrockModel) + } + elevenlabsTtsKey := os.Getenv("ELEVENLABS_TTS_KEY") if elevenlabsTtsKey != "" { manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="elevenlabs_tts").property.api_key`, elevenlabsTtsKey) } + cosyTtsKey := os.Getenv("COSY_TTS_KEY") + if cosyTtsKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="cosy_tts").property.api_key`, cosyTtsKey) + } + + qwenApiKey := os.Getenv("QWEN_API_KEY") + if qwenApiKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="qwen_llm").property.api_key`, qwenApiKey) + } + err = os.WriteFile(manifestJsonFile, []byte(manifestJson), 0644) return }