diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..86b53a61 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,652 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS,examples,tests,out + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns= + +# List of module names for which member attributes should not be checked and +# will not be imported (useful for modules/projects where namespaces are +# manipulated during runtime and thus existing member attributes cannot be +# deduced by static analysis). It supports qualified module names, as well as +# Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Resolve imports to .pyi stubs if available. May reduce no-member messages and +# increase not-an-iterable messages. +prefer-stubs=no + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.10 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +#typealias-rgx= + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of positional arguments for function / method. +#max-positional-arguments=5 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + use-implicit-booleaness-not-comparison-to-string, + use-implicit-booleaness-not-comparison-to-zero, + broad-exception-caught, + logging-fstring-interpolation, + arguments-renamed, + I,C,R, + fixme, + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + +# Let 'consider-using-join' be raised when the separator to join on would be +# non-empty (resulting in expected fixes of the type: ``"- " + " - +# ".join(items)``) +suggest-join-with-non-empty-separator=yes + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are: text, parseable, colorized, +# json2 (improved json format), json (old json format) and msvs (visual +# studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=firebase_admin.firestore.*,firestore.* + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/.vscode/settings.json b/.vscode/settings.json index 6c83cb6a..40cdf73e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,4 +5,8 @@ ], "C_Cpp.intelliSenseEngine": "disabled", "editor.formatOnSave": true, + "editor.defaultFormatter": null, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, } \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 0fe1b899..14f1fe2d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ WORKDIR /app COPY . . COPY agents/${SESSION_CONTROL_CONF} agents/session_control.conf -RUN task clean && task use AGENT=agents/examples/demo && \ +RUN task clean && task use AGENT=agents/examples/demo && task install-tools && task lint && \ cd agents && ./scripts/package.sh FROM ubuntu:22.04 diff --git a/Taskfile.yml b/Taskfile.yml index bf19cc31..28e11ffd 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -7,6 +7,18 @@ tasks: - task: clean-agents - task: clean-server + lint: + desc: lint-agent + env: + PYTHONPATH: "./agents/ten_packages/system/ten_runtime_python/lib:./agents/ten_packages/system/ten_runtime_python/interface:./agents/ten_packages/system/ten_ai_base/interface" + cmds: + - ./agents/scripts/pylint.sh + + install-tools: + desc: install tools + cmds: + - pip install pylint + build: desc: build cmds: @@ -31,7 +43,7 @@ tasks: desc: run tman dev http server for graph designer dir: ./agents cmds: - - tman dev-server + - tman designer run: desc: run servers diff --git a/agents/examples/default/manifest.json b/agents/examples/default/manifest.json index c87763a2..7a33b295 100644 --- a/agents/examples/default/manifest.json +++ b/agents/examples/default/manifest.json @@ -1,27 +1,27 @@ { "type": "app", "name": "agent_demo", - "version": "0.4.0", + "version": "0.6.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" }, { "type": "extension", "name": "py_init_extension_cpp", - "version": "0.4" + "version": "0.6" }, { "type": "extension", "name": "agora_rtc", - "version": "=0.9.0-rc1" + "version": "=0.10.1" }, { "type": "extension", "name": "agora_sess_ctrl", - "version": "0.3.0-rc1" + "version": "=0.4.1" }, { "type": "system", @@ -31,12 +31,112 @@ { "type": "extension", "name": "azure_tts", - "version": "=0.6.2" + "version": "=0.7.1" }, { "type": "extension", - "name": "agora_rtm", - "version": "=0.3.0" + "name": "openai_v2v_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "message_collector", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "bingsearch_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "openai_chatgpt_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "fish_audio_tts", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "interrupt_detector_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "weatherapi_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "deepgram_asr_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "vision_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "vision_analyze_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "transcribe_asr_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "gemini_llm_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "bedrock_llm_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "polly_tts", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "minimax_tts_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "minimax_v2v_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "cosy_tts_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "elevenlabs_tts_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "dify_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "gemini_v2v_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "coze_python_async", + "version": "=0.1.0" } ] } \ No newline at end of file diff --git a/agents/examples/default/property.json b/agents/examples/default/property.json index 02869ba5..5acef1f8 100644 --- a/agents/examples/default/property.json +++ b/agents/examples/default/property.json @@ -95,14 +95,12 @@ ], "connections": [ { - "extension_group": "default", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -111,7 +109,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -120,7 +117,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -131,7 +127,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "stt", "extension": "stt" } ] @@ -139,18 +134,15 @@ ] }, { - "extension_group": "stt", "extension": "stt", "data": [ { "name": "text_data", "dest": [ { - "extension_group": "default", "extension": "interrupt_detector" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -158,14 +150,12 @@ ] }, { - "extension_group": "chatgpt", "extension": "llm", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "tts", "extension": "tts" } ] @@ -174,7 +164,6 @@ "name": "tool_call", "dest": [ { - "extension_group": "default", "extension": "weatherapi_tool_python" } ] @@ -185,11 +174,9 @@ "name": "text_data", "dest": [ { - "extension_group": "tts", "extension": "tts" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -197,14 +184,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -212,14 +197,12 @@ ] }, { - "extension_group": "tts", "extension": "tts", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -230,7 +213,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -238,14 +220,12 @@ ] }, { - "extension_group": "default", "extension": "interrupt_detector", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -256,7 +236,6 @@ "name": "text_data", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -264,14 +243,12 @@ ] }, { - "extension_group": "default", "extension": "weatherapi_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -362,14 +339,12 @@ ], "connections": [ { - "extension_group": "default", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -378,7 +353,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -387,7 +361,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -398,11 +371,9 @@ "name": "text_data", "dest": [ { - "extension_group": "default", "extension": "interrupt_detector" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -410,14 +381,12 @@ ] }, { - "extension_group": "chatgpt", "extension": "llm", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "tts", "extension": "tts" } ] @@ -426,7 +395,6 @@ "name": "tool_call", "dest": [ { - "extension_group": "default", "extension": "weatherapi_tool_python" } ] @@ -437,11 +405,9 @@ "name": "text_data", "dest": [ { - "extension_group": "tts", "extension": "tts" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -449,14 +415,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -464,14 +428,12 @@ ] }, { - "extension_group": "tts", "extension": "tts", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -482,7 +444,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -490,14 +451,12 @@ ] }, { - "extension_group": "default", "extension": "interrupt_detector", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -508,7 +467,6 @@ "name": "text_data", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -516,14 +474,12 @@ ] }, { - "extension_group": "default", "extension": "weatherapi_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -589,14 +545,12 @@ ], "connections": [ { - "extension_group": "rtc", "extension": "agora_rtc", "audio_frame": [ { "name": "pcm_frame", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -607,7 +561,6 @@ "name": "on_user_joined", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -616,7 +569,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -625,7 +577,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -633,14 +584,12 @@ ] }, { - "extension_group": "llm", "extension": "v2v", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -649,7 +598,6 @@ "name": "tool_call", "dest": [ { - "extension_group": "default", "extension": "bingsearch_tool_python" } ] @@ -660,7 +608,6 @@ "name": "text_data", "dest": [ { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -671,7 +618,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -679,14 +625,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -694,14 +638,12 @@ ] }, { - "extension_group": "default", "extension": "bingsearch_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] diff --git a/agents/examples/demo/manifest.json b/agents/examples/demo/manifest.json index c87763a2..01159a39 100644 --- a/agents/examples/demo/manifest.json +++ b/agents/examples/demo/manifest.json @@ -1,27 +1,27 @@ { "type": "app", "name": "agent_demo", - "version": "0.4.0", + "version": "0.6.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" }, { "type": "extension", "name": "py_init_extension_cpp", - "version": "0.4" + "version": "0.6" }, { "type": "extension", "name": "agora_rtc", - "version": "=0.9.0-rc1" + "version": "=0.10.1" }, { "type": "extension", "name": "agora_sess_ctrl", - "version": "0.3.0-rc1" + "version": "=0.4.1" }, { "type": "system", @@ -31,12 +31,62 @@ { "type": "extension", "name": "azure_tts", - "version": "=0.6.2" + "version": "=0.7.1" }, { "type": "extension", - "name": "agora_rtm", - "version": "=0.3.0" + "name": "dify_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "gemini_v2v_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "openai_chatgpt_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "bingsearch_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "vision_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "weatherapi_tool_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "interrupt_detector_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "openai_v2v_python", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "message_collector", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "coze_python_async", + "version": "=0.1.0" + }, + { + "type": "extension", + "name": "fish_audio_tts", + "version": "=0.1.0" } ] } \ No newline at end of file diff --git a/agents/examples/demo/property.json b/agents/examples/demo/property.json index 11e003a5..a844d1fe 100644 --- a/agents/examples/demo/property.json +++ b/agents/examples/demo/property.json @@ -99,14 +99,12 @@ ], "connections": [ { - "extension_group": "default", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -115,7 +113,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -124,7 +121,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -135,11 +131,9 @@ "name": "text_data", "dest": [ { - "extension_group": "default", "extension": "interrupt_detector" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -150,7 +144,6 @@ "name": "video_frame", "dest": [ { - "extension_group": "default", "extension": "vision_tool_python" } ] @@ -158,14 +151,12 @@ ] }, { - "extension_group": "chatgpt", "extension": "llm", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "tts", "extension": "tts" } ] @@ -174,15 +165,12 @@ "name": "tool_call", "dest": [ { - "extension_group": "default", "extension": "weatherapi_tool_python" }, { - "extension_group": "default", "extension": "vision_tool_python" }, { - "extension_group": "default", "extension": "bingsearch_tool_python" } ] @@ -193,11 +181,9 @@ "name": "text_data", "dest": [ { - "extension_group": "tts", "extension": "tts" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -205,14 +191,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -220,14 +204,12 @@ ] }, { - "extension_group": "tts", "extension": "tts", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -238,7 +220,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -246,14 +227,12 @@ ] }, { - "extension_group": "default", "extension": "interrupt_detector", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -264,7 +243,6 @@ "name": "text_data", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -272,14 +250,12 @@ ] }, { - "extension_group": "default", "extension": "weatherapi_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -287,14 +263,12 @@ ] }, { - "extension_group": "default", "extension": "vision_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -302,14 +276,12 @@ ] }, { - "extension_group": "default", "extension": "bingsearch_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -384,14 +356,12 @@ ], "connections": [ { - "extension_group": "rtc", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -400,7 +370,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -409,7 +378,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -420,7 +388,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -428,14 +395,12 @@ ] }, { - "extension_group": "llm", "extension": "v2v", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -444,11 +409,9 @@ "name": "tool_call", "dest": [ { - "extension_group": "default", "extension": "bingsearch_tool_python" }, { - "extension_group": "default", "extension": "weatherapi_tool_python" } ] @@ -459,7 +422,6 @@ "name": "text_data", "dest": [ { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -470,7 +432,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -478,14 +439,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -493,14 +452,12 @@ ] }, { - "extension_group": "default", "extension": "bingsearch_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -508,14 +465,12 @@ ] }, { - "extension_group": "default", "extension": "weatherapi_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -610,14 +565,12 @@ ], "connections": [ { - "extension_group": "rtc", "extension": "agora_rtc", "data": [ { "name": "text_data", "dest": [ { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -628,7 +581,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -636,14 +588,12 @@ ] }, { - "extension_group": "tools", "extension": "weatherapi_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -651,14 +601,12 @@ ] }, { - "extension_group": "tools", "extension": "bingsearch_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -666,14 +614,12 @@ ] }, { - "extension_group": "llm", "extension": "v2v", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "tts", "extension": "tts" } ] @@ -682,11 +628,9 @@ "name": "tool_call", "dest": [ { - "extension_group": "tools", "extension": "weatherapi_tool_python" }, { - "extension_group": "tools", "extension": "bingsearch_tool_python" } ] @@ -695,7 +639,6 @@ "name": "on_user_joined", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -704,7 +647,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -715,11 +657,9 @@ "name": "text_data", "dest": [ { - "extension_group": "transcriber", "extension": "message_collector" }, { - "extension_group": "tts", "extension": "tts" } ] @@ -727,14 +667,12 @@ ] }, { - "extension_group": "tts", "extension": "tts", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -745,7 +683,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -753,14 +690,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -798,7 +733,7 @@ { "type": "extension", "name": "interrupt_detector", - "addon": "interrupt_detector", + "addon": "interrupt_detector_python", "extension_group": "default" }, { @@ -834,14 +769,12 @@ ], "connections": [ { - "extension_group": "default", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "glue", "extension": "coze_python_async" } ] @@ -850,7 +783,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "glue", "extension": "coze_python_async" } ] @@ -861,15 +793,12 @@ "name": "text_data", "dest": [ { - "extension_group": "default", "extension": "interrupt_detector" }, { - "extension_group": "glue", "extension": "coze_python_async" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -877,14 +806,12 @@ ] }, { - "extension_group": "glue", "extension": "coze_python_async", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "tts", "extension": "tts" } ] @@ -895,11 +822,9 @@ "name": "text_data", "dest": [ { - "extension_group": "tts", "extension": "tts" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -907,14 +832,12 @@ ] }, { - "extension_group": "tts", "extension": "tts", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -925,7 +848,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -933,14 +855,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -948,14 +868,12 @@ ] }, { - "extension_group": "default", "extension": "interrupt_detector", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "glue", "extension": "coze_python_async" } ] @@ -1024,14 +942,12 @@ ], "connections": [ { - "extension_group": "rtc", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -1040,7 +956,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -1049,7 +964,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -1060,7 +974,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -1071,7 +984,6 @@ "name": "video_frame", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -1079,14 +991,12 @@ ] }, { - "extension_group": "llm", "extension": "v2v", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -1095,7 +1005,6 @@ "name": "tool_call", "dest": [ { - "extension_group": "default", "extension": "weatherapi_tool_python" } ] @@ -1106,7 +1015,6 @@ "name": "text_data", "dest": [ { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -1117,7 +1025,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -1125,14 +1032,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "rtc", "extension": "agora_rtc" } ] @@ -1140,14 +1045,12 @@ ] }, { - "extension_group": "default", "extension": "weatherapi_tool_python", "cmd": [ { "name": "tool_register", "dest": [ { - "extension_group": "llm", "extension": "v2v" } ] @@ -1222,14 +1125,12 @@ ], "connections": [ { - "extension_group": "default", "extension": "agora_rtc", "cmd": [ { "name": "on_user_joined", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -1238,7 +1139,6 @@ "name": "on_user_left", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -1247,7 +1147,6 @@ "name": "on_connection_failure", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -1258,11 +1157,9 @@ "name": "text_data", "dest": [ { - "extension_group": "default", "extension": "interrupt_detector" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -1270,14 +1167,12 @@ ] }, { - "extension_group": "chatgpt", "extension": "llm", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "tts", "extension": "tts" } ] @@ -1288,11 +1183,9 @@ "name": "text_data", "dest": [ { - "extension_group": "tts", "extension": "tts" }, { - "extension_group": "transcriber", "extension": "message_collector" } ] @@ -1300,14 +1193,12 @@ ] }, { - "extension_group": "transcriber", "extension": "message_collector", "data": [ { "name": "data", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -1315,14 +1206,12 @@ ] }, { - "extension_group": "tts", "extension": "tts", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -1333,7 +1222,6 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "default", "extension": "agora_rtc" } ] @@ -1341,14 +1229,12 @@ ] }, { - "extension_group": "default", "extension": "interrupt_detector", "cmd": [ { "name": "flush", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] @@ -1359,7 +1245,6 @@ "name": "text_data", "dest": [ { - "extension_group": "chatgpt", "extension": "llm" } ] diff --git a/agents/examples/experimental/manifest.json b/agents/examples/experimental/manifest.json index 2a16c223..c8fc067c 100644 --- a/agents/examples/experimental/manifest.json +++ b/agents/examples/experimental/manifest.json @@ -1,27 +1,27 @@ { "type": "app", "name": "agent_experimental", - "version": "0.4.0", + "version": "0.6.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" }, { "type": "extension", "name": "py_init_extension_cpp", - "version": "0.4" + "version": "0.6" }, { "type": "extension", "name": "agora_rtc", - "version": "=0.9.0-rc1" + "version": "=0.10.1" }, { "type": "extension", "name": "agora_sess_ctrl", - "version": "0.3.0-rc1" + "version": "=0.4.1" }, { "type": "system", @@ -31,7 +31,7 @@ { "type": "extension", "name": "azure_tts", - "version": "=0.6.0" + "version": "=0.7.1" }, { "type": "extension", diff --git a/agents/scripts/install_deps_and_build.sh b/agents/scripts/install_deps_and_build.sh index 0a19b753..5e817f23 100755 --- a/agents/scripts/install_deps_and_build.sh +++ b/agents/scripts/install_deps_and_build.sh @@ -41,7 +41,7 @@ build_cxx_extensions() { fi if [[ ! -d $extension/lib ]]; then echo "No output for extension $extension_name." - exit 1 + # exit 1 fi mkdir -p $app_dir/ten_packages/extension/$extension_name/lib diff --git a/agents/scripts/pylint.sh b/agents/scripts/pylint.sh new file mode 100755 index 00000000..57f251f3 --- /dev/null +++ b/agents/scripts/pylint.sh @@ -0,0 +1,4 @@ +#!/bin/bash + + +pylint ./agents/ten_packages/extension/. || pylint-exit --warn-fail --error-fail $? \ No newline at end of file diff --git a/agents/ten_packages/bak/litellm_python/manifest.json b/agents/ten_packages/bak/litellm_python/manifest.json index cd76096a..ab382bec 100644 --- a/agents/ten_packages/bak/litellm_python/manifest.json +++ b/agents/ten_packages/bak/litellm_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "litellm_python", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { @@ -79,4 +79,4 @@ } ] } -} +} \ No newline at end of file diff --git a/agents/ten_packages/extension/agora_rtm_wrapper/extension.go b/agents/ten_packages/extension/agora_rtm_wrapper/extension.go index 04b30992..680daae2 100644 --- a/agents/ten_packages/extension/agora_rtm_wrapper/extension.go +++ b/agents/ten_packages/extension/agora_rtm_wrapper/extension.go @@ -12,7 +12,7 @@ package extension import ( "encoding/json" - "log/slog" + "fmt" "strconv" "ten_framework/ten" @@ -46,10 +46,6 @@ type RtcUserSate struct { Reason string `json:"reason"` // 原因 } -var ( - logTag = slog.String("extension", "AGORA_RTM_WRAPPER_EXTENSION") -) - type agoraRtmWrapperExtension struct { ten.DefaultExtension } @@ -65,14 +61,14 @@ func (p *agoraRtmWrapperExtension) OnData( ) { buf, err := data.GetPropertyBytes("data") if err != nil { - slog.Error("OnData GetProperty data error: " + err.Error()) + tenEnv.LogError("OnData GetProperty data error: " + err.Error()) return } - slog.Info("AGORA_RTM_WRAPPER_EXTENSION OnData: "+string(buf), logTag) + tenEnv.LogInfo("AGORA_RTM_WRAPPER_EXTENSION OnData: " + string(buf)) colllectorMessage := ColllectorMessage{} err = json.Unmarshal(buf, &colllectorMessage) if err != nil { - slog.Error("OnData Unmarshal data error: " + err.Error()) + tenEnv.LogError("OnData Unmarshal data error: " + err.Error()) return } @@ -85,70 +81,70 @@ func (p *agoraRtmWrapperExtension) OnData( } jsonBytes, err := json.Marshal(message) if err != nil { - slog.Error("failed to marshal JSON: " + err.Error()) + tenEnv.LogError("failed to marshal JSON: " + err.Error()) return } - slog.Info("AGORA_RTM_WRAPPER_EXTENSION OnData: "+string(jsonBytes), logTag) + tenEnv.LogInfo("AGORA_RTM_WRAPPER_EXTENSION OnData: " + string(jsonBytes)) cmd, _ := ten.NewCmd("publish") err = cmd.SetPropertyBytes("message", jsonBytes) if err != nil { - slog.Error("failed to set property message: " + err.Error()) + tenEnv.LogError("failed to set property message: " + err.Error()) return } - if err := tenEnv.SendCmd(cmd, func(_ ten.TenEnv, result ten.CmdResult) { - slog.Info("AGORA_RTM_WRAPPER_EXTENSION publish result " + result.ToJSON()) + if err := tenEnv.SendCmd(cmd, func(_ ten.TenEnv, result ten.CmdResult, _ error) { status, err := result.GetStatusCode() + tenEnv.LogInfo(fmt.Sprintf("AGORA_RTM_WRAPPER_EXTENSION publish result %d", status)) if status != ten.StatusCodeOk || err != nil { - slog.Error("failed to subscribe ") + tenEnv.LogError("failed to subscribe") } }); err != nil { - slog.Error("failed to send command " + err.Error()) + tenEnv.LogError("failed to send command " + err.Error()) } } func (p *agoraRtmWrapperExtension) OnCmd(tenEnv ten.TenEnv, cmd ten.Cmd) { defer func() { if r := recover(); r != nil { - slog.Error("OnCmd panic", "recover", r) + tenEnv.LogError(fmt.Sprintf("OnCmd panic: %v", r)) } cmdResult, err := ten.NewCmdResult(ten.StatusCodeOk) if err != nil { - slog.Error("failed to create cmd result", "err", err) + tenEnv.LogError(fmt.Sprintf("failed to create cmd result: %v", err)) return } - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) }() cmdName, err := cmd.GetName() if err != nil { - slog.Error("failed to get cmd name", "err", err) + tenEnv.LogError(fmt.Sprintf("failed to get cmd name: %v", err)) return } - slog.Info(cmd.ToJSON(), logTag) + tenEnv.LogInfo(fmt.Sprintf("received command: %s", cmdName)) switch cmdName { case "on_user_audio_track_state_changed": // on_user_audio_track_state_changed p.handleUserStateChanged(tenEnv, cmd) default: - slog.Warn("unsupported cmd", "cmd", cmdName) + tenEnv.LogWarn(fmt.Sprintf("unsupported cmd: %s", cmdName)) } } func (p *agoraRtmWrapperExtension) handleUserStateChanged(tenEnv ten.TenEnv, cmd ten.Cmd) { remoteUserID, err := cmd.GetPropertyString("remote_user_id") if err != nil { - slog.Error("failed to get remote_user_id", "err", err) + tenEnv.LogError(fmt.Sprintf("failed to get remote_user_id: %v", err)) return } state, err := cmd.GetPropertyInt32("state") if err != nil { - slog.Error("failed to get state", "err", err) + tenEnv.LogError(fmt.Sprintf("failed to get state: %v", err)) return } reason, err := cmd.GetPropertyInt32("reason") if err != nil { - slog.Error("failed to get reason", "err", err) + tenEnv.LogError(fmt.Sprintf("failed to get reason: %v", err)) return } userState := RtcUserSate{ @@ -158,28 +154,24 @@ func (p *agoraRtmWrapperExtension) handleUserStateChanged(tenEnv ten.TenEnv, cmd } jsonBytes, err := json.Marshal(userState) if err != nil { - slog.Error("failed to marshal JSON: " + err.Error()) + tenEnv.LogError("failed to marshal JSON: " + err.Error()) return } sendCmd, _ := ten.NewCmd("set_presence_state") sendCmd.SetPropertyString("states", string(jsonBytes)) - cmdStr := sendCmd.ToJSON() - slog.Info("AGORA_RTM_WRAPPER_EXTENSION SetRtmPresenceState " + cmdStr) - if err := tenEnv.SendCmd(sendCmd, func(_ ten.TenEnv, result ten.CmdResult) { - slog.Info("AGORA_RTM_WRAPPER_EXTENSION SetRtmPresenceState result " + result.ToJSON()) + tenEnv.LogInfo("AGORA_RTM_WRAPPER_EXTENSION SetRtmPresenceState " + string(jsonBytes)) + if err := tenEnv.SendCmd(sendCmd, func(_ ten.TenEnv, result ten.CmdResult, _ error) { status, err := result.GetStatusCode() + tenEnv.LogInfo(fmt.Sprintf("AGORA_RTM_WRAPPER_EXTENSION SetRtmPresenceState result %d", status)) if status != ten.StatusCodeOk || err != nil { - panic("failed to SetRtmPresenceState ") + panic("failed to SetRtmPresenceState") } }); err != nil { - slog.Error("failed to send command " + err.Error()) + tenEnv.LogError("failed to send command " + err.Error()) } - } func init() { - slog.Info("agora_rtm_wrapper extension init", logTag) - // Register addon ten.RegisterAddonAsExtension( "agora_rtm_wrapper", diff --git a/agents/ten_packages/extension/agora_rtm_wrapper/manifest.json b/agents/ten_packages/extension/agora_rtm_wrapper/manifest.json index 490984f4..deb4ee1b 100644 --- a/agents/ten_packages/extension/agora_rtm_wrapper/manifest.json +++ b/agents/ten_packages/extension/agora_rtm_wrapper/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/__init__.py b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/__init__.py index 1af08bf4..b5d987b0 100644 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/__init__.py +++ b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/__init__.py @@ -1,4 +1 @@ from . import vector_storage_addon -from .log import logger - -logger.info("aliyun_analyticdb_vector_storage extension loaded") diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/client.py b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/client.py index 43c4f541..48dcdb18 100644 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/client.py +++ b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/client.py @@ -1,9 +1,5 @@ # -*- coding: utf-8 -*- -try: - from .log import logger -except ImportError: - from log import logger import asyncio import threading from typing import Coroutine @@ -16,7 +12,7 @@ # maybe need multiple clients class AliGPDBClient: - def __init__(self, access_key_id, access_key_secret, endpoint): + def __init__(self, ten_env, access_key_id, access_key_secret, endpoint): self.stopEvent = asyncio.Event() self.loop = None self.tasks = asyncio.Queue() @@ -28,6 +24,7 @@ def __init__(self, access_key_id, access_key_secret, endpoint): target=asyncio.run, args=(self.__thread_routine(),) ) self.thread.start() + self.ten_env = ten_env async def stop_thread(self): self.stopEvent.set() @@ -50,7 +47,7 @@ def close(self): self.thread.join() async def __thread_routine(self): - logger.info("client __thread_routine start") + self.ten_env.log_info("client __thread_routine start") self.loop = asyncio.get_running_loop() tasks = set() while not self.stopEvent.is_set(): @@ -68,11 +65,11 @@ async def __thread_routine(self): ) for task in done: if task.exception(): - logger.error(f"task exception: {task.exception()}") + self.ten_env.log_error(f"task exception: {task.exception()}") future.set_exception(task.exception()) else: await asyncio.sleep(0.1) - logger.info("client __thread_routine end") + self.ten_env.log_info("client __thread_routine end") async def submit_task(self, coro: Coroutine) -> Future: future = Future() diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/log.py b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/log.py deleted file mode 100644 index 0cfa1aaf..00000000 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("aliyun_analyticdb_vector_storage") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/manifest.json b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/manifest.json index 05cf24ba..3ba20fdc 100644 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/manifest.json +++ b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "aliyun_analyticdb_vector_storage", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/model.py b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/model.py index cee402d1..1b3b47f3 100644 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/model.py +++ b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/model.py @@ -1,11 +1,6 @@ # -*- coding: utf-8 -*- from alibabacloud_gpdb20160503 import models as gpdb_20160503_models # type: ignore - -try: - from .log import logger -except ImportError: - from log import logger import time import json from typing import Dict, List, Any, Tuple @@ -13,12 +8,13 @@ class Model: - def __init__(self, region_id, dbinstance_id, client): + def __init__(self, ten_env, region_id, dbinstance_id, client): self.region_id = region_id self.dbinstance_id = dbinstance_id self.client = client self.read_timeout = 10 * 1000 self.connect_timeout = 10 * 1000 + self.ten_env = ten_env def get_client(self): return self.client.get() @@ -37,11 +33,11 @@ def init_vector_database(self, account, account_password) -> None: response = self.get_client().init_vector_database_with_options( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"init_vector_database response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e async def init_vector_database_async(self, account, account_password) -> None: @@ -58,11 +54,11 @@ async def init_vector_database_async(self, account, account_password) -> None: response = await self.get_client().init_vector_database_with_options_async( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"init_vector_database response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e def create_namespace( @@ -81,11 +77,11 @@ def create_namespace( read_timeout=self.read_timeout, connect_timeout=self.connect_timeout ) response = self.get_client().create_namespace_with_options(request, runtime) - logger.debug( + self.ten_env.log_debug( f"create_namespace response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e async def create_namespace_async( @@ -106,11 +102,11 @@ async def create_namespace_async( response = await self.get_client().create_namespace_with_options_async( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"create_namespace response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e def create_collection( @@ -149,11 +145,11 @@ def create_collection( response = self.get_client().create_collection_with_options( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"create_document_collection response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e async def create_collection_async( @@ -192,11 +188,11 @@ async def create_collection_async( response = await self.get_client().create_collection_with_options_async( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"create_document_collection response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e def delete_collection(self, namespace, namespace_password, collection) -> None: @@ -214,11 +210,11 @@ def delete_collection(self, namespace, namespace_password, collection) -> None: response = self.get_client().delete_collection_with_options( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"delete_collection response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e async def delete_collection_async( @@ -238,11 +234,11 @@ async def delete_collection_async( response = await self.get_client().delete_collection_with_options_async( request, runtime ) - logger.info( + self.ten_env.log_info( f"delete_collection response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e def upsert_collection_data( @@ -283,11 +279,11 @@ def upsert_collection_data( response = self.get_client().upsert_collection_data_with_options( upsert_collection_data_request, runtime ) - logger.debug( + self.ten_env.log_debug( f"upsert_collection response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e async def upsert_collection_data_async( @@ -330,13 +326,14 @@ async def upsert_collection_data_async( upsert_collection_data_request, runtime ) ) - logger.debug( + self.ten_env.log_debug( f"upsert_collection response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e + # pylint: disable=redefined-builtin def query_collection_data( self, collection, @@ -377,13 +374,13 @@ def query_collection_data( response = self.get_client().query_collection_data_with_options( query_collection_data_request, runtime ) - # logger.info(f"query_collection response code: {response.status_code}, body:{response.body}") - logger.debug(f"query_collection response code: {response.status_code}") + self.ten_env.log_debug(f"query_collection response code: {response.status_code}") return response, None except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return None, e + # pylint: disable=redefined-builtin async def query_collection_data_async( self, collection, @@ -424,10 +421,10 @@ async def query_collection_data_async( response = await self.get_client().query_collection_data_with_options_async( query_collection_data_request, runtime ) - logger.debug(f"query_collection response code: {response.status_code}") + self.ten_env.log_debug(f"query_collection response code: {response.status_code}") return response, None except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return None, e def parse_collection_data( @@ -443,7 +440,9 @@ def parse_collection_data( json_str = json.dumps(results) return json_str except Exception as e: - logger.error(f"parse collection data failed, error: {e}, data: {body.to_map()}") + self.ten_env.log_error( + f"parse collection data failed, error: {e}, data: {body.to_map()}" + ) return "[]" def list_collections(self, namespace, namespace_password) -> Tuple[List[str], Any]: @@ -458,13 +457,13 @@ def list_collections(self, namespace, namespace_password) -> Tuple[List[str], An read_timeout=self.read_timeout, connect_timeout=self.connect_timeout ) response = self.get_client().list_collections_with_options(request, runtime) - logger.debug( + self.ten_env.log_debug( f"list_collections response code: {response.status_code}, body:{response.body}" ) collections = response.body.to_map()["Collections"]["collection"] return collections, None except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return [], e async def list_collections_async( @@ -483,13 +482,13 @@ async def list_collections_async( response = await self.get_client().list_collections_with_options_async( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"list_collections response code: {response.status_code}, body:{response.body}" ) collections = response.body.to_map()["Collections"]["collection"] return collections, None except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return [], e def create_vector_index( @@ -512,11 +511,11 @@ def create_vector_index( response = self.get_client().create_vector_index_with_options( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"create_vector_index response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e async def create_vector_index_async( @@ -539,9 +538,9 @@ async def create_vector_index_async( response = await self.get_client().create_vector_index_with_options_async( request, runtime ) - logger.debug( + self.ten_env.log_debug( f"create_vector_index response code: {response.status_code}, body:{response.body}" ) except Exception as e: - logger.error(f"Error: {e}") + self.ten_env.log_error(f"Error: {e}") return e diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_addon.py b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_addon.py index 747b091b..aff84dd3 100644 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_addon.py +++ b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_addon.py @@ -8,7 +8,6 @@ @register_addon_as_extension("aliyun_analyticdb_vector_storage") class AliPGDBExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger from .vector_storage_extension import AliPGDBExtension - logger.info("on_create_instance") + ten.log_info("on_create_instance") ten.on_create_instance_done(AliPGDBExtension(addon_name), context) diff --git a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_extension.py b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_extension.py index 82475f08..81e300e2 100644 --- a/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_extension.py +++ b/agents/ten_packages/extension/aliyun_analyticdb_vector_storage/vector_storage_extension.py @@ -13,8 +13,6 @@ CmdResult, ) -from typing import List -from .log import logger import threading from datetime import datetime @@ -28,7 +26,7 @@ def __init__(self, name): self.access_key_secret = os.environ.get("ALIBABA_CLOUD_ACCESS_KEY_SECRET") self.region_id = os.environ.get("ADBPG_INSTANCE_REGION") self.dbinstance_id = os.environ.get("ADBPG_INSTANCE_ID") - self.endpoint = f"gpdb.aliyuncs.com" + self.endpoint = "gpdb.aliyuncs.com" self.model = None self.account = os.environ.get("ADBPG_ACCOUNT") self.account_password = os.environ.get("ADBPG_ACCOUNT_PASSWORD") @@ -36,7 +34,7 @@ def __init__(self, name): self.namespace_password = os.environ.get("ADBPG_NAMESPACE_PASSWORD") async def __thread_routine(self, ten_env: TenEnv): - logger.info("__thread_routine start") + ten_env.log_info("__thread_routine start") self.loop = asyncio.get_running_loop() ten_env.on_start_done() await self.stopEvent.wait() @@ -45,7 +43,7 @@ async def stop_thread(self): self.stopEvent.set() def on_start(self, ten: TenEnv) -> None: - logger.info(f"on_start") + ten.log_info("on_start") self.access_key_id = self.get_property_string( ten, "ALIBABA_CLOUD_ACCESS_KEY_ID", self.access_key_id ) @@ -90,9 +88,9 @@ def on_start(self, ten: TenEnv) -> None: from .model import Model client = AliGPDBClient( - self.access_key_id, self.access_key_secret, self.endpoint + ten, self.access_key_id, self.access_key_secret, self.endpoint ) - self.model = Model(self.region_id, self.dbinstance_id, client) + self.model = Model(ten, self.region_id, self.dbinstance_id, client) self.thread = threading.Thread( target=asyncio.run, args=(self.__thread_routine(ten),) ) @@ -102,7 +100,7 @@ def on_start(self, ten: TenEnv) -> None: return def on_stop(self, ten: TenEnv) -> None: - logger.info("on_stop") + ten.log_info("on_stop") if self.thread is not None and self.thread.is_alive(): asyncio.run_coroutine_threadsafe(self.stop_thread(), self.loop) self.thread.join() @@ -116,7 +114,7 @@ def on_data(self, ten: TenEnv, data: Data) -> None: def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: try: cmd_name = cmd.get_name() - logger.info(f"on_cmd [{cmd_name}]") + ten.log_info(f"on_cmd [{cmd_name}]") if cmd_name == "create_collection": asyncio.run_coroutine_threadsafe( self.async_create_collection(ten, cmd), self.loop @@ -135,7 +133,7 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: ) else: ten.return_result(CmdResult.create(StatusCode.ERROR), cmd) - except Exception as e: + except Exception: ten.return_result(CmdResult.create(StatusCode.ERROR), cmd) async def async_create_collection(self, ten: TenEnv, cmd: Cmd): @@ -144,7 +142,7 @@ async def async_create_collection(self, ten: TenEnv, cmd: Cmd): try: dimension = cmd.get_property_int("dimension") except Exception as e: - logger.warning(f"Error: {e}") + ten.log_warn(f"Error: {e}") err = await self.model.create_collection_async( self.account, self.account_password, self.namespace, collection @@ -172,14 +170,8 @@ async def async_upsert_vector(self, ten: TenEnv, cmd: Cmd): err = await self.model.upsert_collection_data_async( collection, self.namespace, self.namespace_password, rows ) - logger.info( - "upsert_vector finished for file {}, collection {}, rows len {}, err {}, cost {}ms".format( - file, - collection, - len(rows), - err, - int((datetime.now() - start_time).total_seconds() * 1000), - ) + ten.log_info( + f"upsert_vector finished for file {file}, collection {collection}, rows len {len(rows)}, err {err}, cost {int((datetime.now() - start_time).total_seconds() * 1000)}ms" ) if err is None: ten.return_result(CmdResult.create(StatusCode.OK), cmd) @@ -195,13 +187,8 @@ async def async_query_vector(self, ten: TenEnv, cmd: Cmd): response, error = await self.model.query_collection_data_async( collection, self.namespace, self.namespace_password, vector, top_k=top_k ) - logger.info( - "query_vector finished for collection {}, embedding len {}, err {}, cost {}ms".format( - collection, - len(embedding), - error, - int((datetime.now() - start_time).total_seconds() * 1000), - ) + ten.log_info( + f"query_vector finished for collection {collection}, embedding len {len(embedding)}, err {error}, cost {int((datetime.now() - start_time).total_seconds() * 1000)}ms" ) if error: @@ -214,6 +201,7 @@ async def async_query_vector(self, ten: TenEnv, cmd: Cmd): async def async_delete_collection(self, ten: TenEnv, cmd: Cmd): collection = cmd.get_property_string("collection_name") + # pylint: disable=too-many-function-args err = await self.model.delete_collection_async( self.account, self.account_password, self.namespace, collection ) @@ -226,5 +214,5 @@ def get_property_string(self, ten: TenEnv, key: str, default: str) -> str: try: return ten.get_property_string(key.lower()) except Exception as e: - logger.error(f"Error: {e}") + ten.log_error(f"Error: {e}") return default diff --git a/agents/ten_packages/extension/aliyun_text_embedding/__init__.py b/agents/ten_packages/extension/aliyun_text_embedding/__init__.py index b06c946a..61530b41 100644 --- a/agents/ten_packages/extension/aliyun_text_embedding/__init__.py +++ b/agents/ten_packages/extension/aliyun_text_embedding/__init__.py @@ -1,4 +1 @@ from . import embedding_addon -from .log import logger - -logger.info("aliyun_text_embedding extension loaded") diff --git a/agents/ten_packages/extension/aliyun_text_embedding/embedding_addon.py b/agents/ten_packages/extension/aliyun_text_embedding/embedding_addon.py index 8c085efc..8a02bbc6 100644 --- a/agents/ten_packages/extension/aliyun_text_embedding/embedding_addon.py +++ b/agents/ten_packages/extension/aliyun_text_embedding/embedding_addon.py @@ -8,7 +8,6 @@ @register_addon_as_extension("aliyun_text_embedding") class EmbeddingExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger from .embedding_extension import EmbeddingExtension - logger.info("on_create_instance") + ten.log_info("on_create_instance") ten.on_create_instance_done(EmbeddingExtension(addon_name), context) diff --git a/agents/ten_packages/extension/aliyun_text_embedding/embedding_extension.py b/agents/ten_packages/extension/aliyun_text_embedding/embedding_extension.py index 27653459..fa2f24a3 100644 --- a/agents/ten_packages/extension/aliyun_text_embedding/embedding_extension.py +++ b/agents/ten_packages/extension/aliyun_text_embedding/embedding_extension.py @@ -9,7 +9,6 @@ import json from typing import Generator, List from http import HTTPStatus -from .log import logger import threading, queue from datetime import datetime @@ -34,21 +33,20 @@ def __init__(self, name: str): self.queue = queue.Queue() self.threads = [] - # TODO: workaround to speed up the embedding process, + # workaround to speed up the embedding process, # should be replace by https://help.aliyun.com/zh/model-studio/developer-reference/text-embedding-batch-api?spm=a2c4g.11186623.0.0.24cb7453KSjdhC # once v3 models supported self.parallel = 10 def on_start(self, ten: TenEnv) -> None: - logger.info("on_start") + ten.log_info("on_start") self.api_key = self.get_property_string(ten, "api_key", self.api_key) self.model = self.get_property_string(ten, "model", self.api_key) - # lazy import packages which requires long time to load - global dashscope + global dashscope # pylint: disable=global-statement import dashscope - + dashscope.api_key = self.api_key for i in range(self.parallel): @@ -59,7 +57,7 @@ def on_start(self, ten: TenEnv) -> None: ten.on_start_done() def async_handler(self, index: int, ten: TenEnv): - logger.info("async_handler {} statend".format(index)) + ten.log_info(f"async_handler {index} statend") while not self.stop: cmd = self.queue.get() @@ -68,32 +66,38 @@ def async_handler(self, index: int, ten: TenEnv): cmd_name = cmd.get_name() start_time = datetime.now() - logger.info( - "async_handler {} processing cmd {}".format(index, cmd_name)) - + ten.log_info(f"async_handler {index} processing cmd {cmd_name}") + if cmd_name == CMD_EMBED: - cmd_result = self.call_with_str(cmd.get_property_string("input")) + cmd_result = self.call_with_str(cmd.get_property_string("input"), ten) ten.return_result(cmd_result, cmd) elif cmd_name == CMD_EMBED_BATCH: - list = json.loads(cmd.get_property_to_json("inputs")) - cmd_result = self.call_with_strs(list) + inputs_list = json.loads(cmd.get_property_to_json("inputs")) + cmd_result = self.call_with_strs(inputs_list, ten) ten.return_result(cmd_result, cmd) else: - logger.warning("unknown cmd {}".format(cmd_name)) - - logger.info( - "async_handler {} finished processing cmd {}, cost {}ms".format(index, cmd_name, int((datetime.now() - start_time).total_seconds() * 1000))) + ten.log_warn("unknown cmd {cmd_name}") + + ten.log_info( + f"async_handler {index} finished processing cmd {cmd_name}, cost {int((datetime.now() - start_time).total_seconds() * 1000)}ms" + ) - logger.info("async_handler {} stopped".format(index)) + ten.log_info(f"async_handler {index} stopped") - def call_with_str(self, message: str) -> CmdResult: + def call_with_str(self, message: str, ten: TenEnv) -> CmdResult: start_time = datetime.now() + # pylint: disable=undefined-variable response = dashscope.TextEmbedding.call(model=self.model, input=message) - logger.info("embedding call finished for input [{}], status_code {}, cost {}ms".format(message, response.status_code, int((datetime.now() - start_time).total_seconds() * 1000))) + ten.log_info( + f"embedding call finished for input [{message}], status_code {response.status_code}, cost {int((datetime.now() - start_time).total_seconds() * 1000)}ms" + ) if response.status_code == HTTPStatus.OK: cmd_result = CmdResult.create(StatusCode.OK) - cmd_result.set_property_from_json(FIELD_KEY_EMBEDDING, json.dumps(response.output["embeddings"][0]["embedding"])) + cmd_result.set_property_from_json( + FIELD_KEY_EMBEDDING, + json.dumps(response.output["embeddings"][0]["embedding"]), + ) return cmd_result else: cmd_result = CmdResult.create(StatusCode.ERROR) @@ -107,13 +111,14 @@ def batched( for i in range(0, len(inputs), batch_size): yield inputs[i : i + batch_size] - def call_with_strs(self, messages: List[str]) -> CmdResult: + def call_with_strs(self, messages: List[str], ten: TenEnv) -> CmdResult: start_time = datetime.now() result = None # merge the results. batch_counter = 0 for batch in self.batched(messages): + # pylint: disable=undefined-variable response = dashscope.TextEmbedding.call(model=self.model, input=batch) - # logger.info("%s Received %s", batch, response) + # ten.log_info("%s Received %s", batch, response) if response.status_code == HTTPStatus.OK: if result is None: result = response.output @@ -122,25 +127,29 @@ def call_with_strs(self, messages: List[str]) -> CmdResult: emb["text_index"] += batch_counter result["embeddings"].append(emb) else: - logger.error("call %s failed, errmsg: %s", batch, response) + ten.log_error("call %s failed, errmsg: %s", batch, response) batch_counter += len(batch) - logger.info("embedding call finished for inputs len {}, batch_counter {}, results len {}, cost {}ms ".format(len(messages), batch_counter, len(result["embeddings"]), int((datetime.now() - start_time).total_seconds() * 1000))) + ten.log_info( + f"embedding call finished for inputs len {len(messages)}, batch_counter {batch_counter}, results len {len(result['embeddings'])}, cost {int((datetime.now() - start_time).total_seconds() * 1000)}ms " + ) if result is not None: cmd_result = CmdResult.create(StatusCode.OK) - # TODO: too slow `set_property_to_json`, so use `set_property_string` at the moment as workaround + # too slow `set_property_to_json`, so use `set_property_string` at the moment as workaround # will be replaced once `set_property_to_json` improved - cmd_result.set_property_string(FIELD_KEY_EMBEDDINGS, json.dumps(result["embeddings"])) + cmd_result.set_property_string( + FIELD_KEY_EMBEDDINGS, json.dumps(result["embeddings"]) + ) return cmd_result else: cmd_result = CmdResult.create(StatusCode.ERROR) cmd_result.set_property_string(FIELD_KEY_MESSAGE, "All batch failed") - logger.error("All batch failed") + ten.log_error("All batch failed") return cmd_result def on_stop(self, ten: TenEnv) -> None: - logger.info("on_stop") + ten.log_info("on_stop") self.stop = True # clear queue while not self.queue.empty(): @@ -158,23 +167,21 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() if cmd_name in [CMD_EMBED, CMD_EMBED_BATCH]: - """ - // embed - { - "name": "embed", - "input": "hello" - } - - // embed_batch - { - "name": "embed_batch", - "inputs": ["hello", ...] - } - """ + # // embed + # { + # "name": "embed", + # "input": "hello" + # } + + # // embed_batch + # { + # "name": "embed_batch", + # "inputs": ["hello", ...] + # } self.queue.put(cmd) else: - logger.warning("unknown cmd {}".format(cmd_name)) + ten.log_warn(f"unknown cmd {cmd_name}") cmd_result = CmdResult.create(StatusCode.ERROR) ten.return_result(cmd_result, cmd) @@ -182,5 +189,5 @@ def get_property_string(self, ten: TenEnv, key, default): try: return ten.get_property_string(key) except Exception as e: - logger.warning(f"err: {e}") + ten.log_warn(f"err: {e}") return default diff --git a/agents/ten_packages/extension/aliyun_text_embedding/log.py b/agents/ten_packages/extension/aliyun_text_embedding/log.py deleted file mode 100644 index 2e90975a..00000000 --- a/agents/ten_packages/extension/aliyun_text_embedding/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("aliyun_text_embedding") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/aliyun_text_embedding/manifest.json b/agents/ten_packages/extension/aliyun_text_embedding/manifest.json index 86b8de32..c7cba56f 100644 --- a/agents/ten_packages/extension/aliyun_text_embedding/manifest.json +++ b/agents/ten_packages/extension/aliyun_text_embedding/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "aliyun_text_embedding", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/bedrock_llm_python/__init__.py b/agents/ten_packages/extension/bedrock_llm_python/__init__.py index 4e39b9dc..31694384 100644 --- a/agents/ten_packages/extension/bedrock_llm_python/__init__.py +++ b/agents/ten_packages/extension/bedrock_llm_python/__init__.py @@ -1,4 +1 @@ from . import bedrock_llm_extension -from .log import logger - -logger.info("bedrock_llm_python extension loaded") diff --git a/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm.py b/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm.py index c833f7b7..92232b8d 100644 --- a/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm.py +++ b/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm.py @@ -1,16 +1,19 @@ import boto3 -from .log import logger +from ten import TenEnv + class BedrockLLMConfig: - def __init__(self, - region: str, - access_key: str, - secret_key: str, - model: str, - prompt: str, - top_p: float, - temperature: float, - max_tokens: int): + def __init__( + self, + region: str, + access_key: str, + secret_key: str, + model: str, + prompt: str, + top_p: float, + temperature: float, + max_tokens: int, + ): self.region = region self.access_key = access_key self.secret_key = secret_key @@ -26,7 +29,7 @@ def default_config(cls): region="us-east-1", access_key="", secret_key="", - model="anthropic.claude-3-5-sonnet-20240620-v1:0", # Defaults to Claude 3.5, supported model list: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html + model="anthropic.claude-3-5-sonnet-20240620-v1:0", # Defaults to Claude 3.5, supported model list: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html # system prompt prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", top_p=1.0, @@ -34,21 +37,29 @@ def default_config(cls): max_tokens=512, ) + class BedrockLLM: client = None - def __init__(self, config: BedrockLLMConfig): + + def __init__(self, config: BedrockLLMConfig, ten_env: TenEnv): self.config = config + self.ten_env = ten_env if config.access_key and config.secret_key: - logger.info(f"BedrockLLM initialized with access key: {config.access_key}") + self.ten_env.log_info(f"BedrockLLM initialized with access key: {config.access_key}") - self.client = boto3.client(service_name='bedrock-runtime', - region_name=config.region, - aws_access_key_id=config.access_key, - aws_secret_access_key=config.secret_key) + self.client = boto3.client( + service_name="bedrock-runtime", + region_name=config.region, + aws_access_key_id=config.access_key, + aws_secret_access_key=config.secret_key, + ) else: - logger.info(f"BedrockLLM initialized without access key, using default credentials provider chain.") - self.client = boto3.client(service_name='bedrock-runtime', region_name=config.region) + self.ten_env.log_info( + "BedrockLLM initialized without access key, using default credentials provider chain.") + self.client = boto3.client( + service_name="bedrock-runtime", region_name=config.region + ) def get_converse_stream(self, messages): bedrock_req_params = { @@ -64,12 +75,10 @@ def get_converse_stream(self, messages): } if self.config.prompt: - bedrock_req_params['system'] = [ - {'text': self.config.prompt} - ] + bedrock_req_params["system"] = [{"text": self.config.prompt}] try: response = self.client.converse_stream(**bedrock_req_params) return response except Exception as e: - raise Exception(f"GetConverseStream failed, err: {e}") \ No newline at end of file + raise RuntimeError(f"GetConverseStream failed, err: {e}") from e diff --git a/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm_extension.py b/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm_extension.py index 5e7918a8..80aced02 100644 --- a/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm_extension.py +++ b/agents/ten_packages/extension/bedrock_llm_python/bedrock_llm_extension.py @@ -11,7 +11,6 @@ StatusCode, CmdResult, ) -from .log import logger CMD_IN_FLUSH = "flush" @@ -69,8 +68,8 @@ class BedrockLLMExtension(Extension): outdate_ts = 0 bedrock_llm = None - def on_start(self, ten: TenEnv) -> None: - logger.info("BedrockLLMExtension on_start") + def on_start(self, ten_env: TenEnv) -> None: + ten_env.log_info("BedrockLLMExtension on_start") # Prepare configuration bedrock_llm_config = BedrockLLMConfig.default_config() @@ -82,57 +81,59 @@ def on_start(self, ten: TenEnv) -> None: PROPERTY_PROMPT, ]: try: - value = ten.get_property_string(optional_str_param).strip() + value = ten_env.get_property_string(optional_str_param).strip() if value: bedrock_llm_config.__setattr__(optional_str_param, value) except Exception as err: - logger.debug( + ten_env.log_debug( f"GetProperty optional {optional_str_param} failed, err: {err}. Using default value: {bedrock_llm_config.__getattribute__(optional_str_param)}" ) for optional_float_param in [PROPERTY_TEMPERATURE, PROPERTY_TOP_P]: try: - value = ten.get_property_float(optional_float_param) + value = ten_env.get_property_float(optional_float_param) if value: bedrock_llm_config.__setattr__(optional_float_param, value) except Exception as err: - logger.debug( + ten_env.log_debug( f"GetProperty optional {optional_float_param} failed, err: {err}. Using default value: {bedrock_llm_config.__getattribute__(optional_float_param)}" ) try: - max_tokens = ten.get_property_int(PROPERTY_MAX_TOKENS) + max_tokens = ten_env.get_property_int(PROPERTY_MAX_TOKENS) if max_tokens > 0: bedrock_llm_config.max_tokens = int(max_tokens) except Exception as err: - logger.debug( + ten_env.log_debug( f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}. Using default value: {bedrock_llm_config.max_tokens}" ) try: - greeting = ten.get_property_string(PROPERTY_GREETING) + greeting = ten_env.get_property_string(PROPERTY_GREETING) except Exception as err: - logger.debug( + ten_env.log_debug( f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}." ) try: - prop_max_memory_length = ten.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) + prop_max_memory_length = ten_env.get_property_int( + PROPERTY_MAX_MEMORY_LENGTH + ) if prop_max_memory_length > 0: self.max_memory_length = int(prop_max_memory_length) except Exception as err: - logger.debug( + ten_env.log_debug( f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}." ) # Create bedrockLLM instance try: - self.bedrock_llm = BedrockLLM(bedrock_llm_config) - logger.info( + self.bedrock_llm = BedrockLLM(bedrock_llm_config, ten_env) + ten_env.log_info( f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}" ) except Exception as err: - logger.exception(f"newBedrockLLM failed, err: {err}") + ten_env.log_error(f"newBedrockLLM failed, err: {err}") # Send greeting if available if greeting: @@ -144,40 +145,40 @@ def on_start(self, ten: TenEnv) -> None: output_data.set_property_bool( DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True ) - ten.send_data(output_data) - logger.info(f"greeting [{greeting}] sent") + ten_env.send_data(output_data) + ten_env.log_info(f"greeting [{greeting}] sent") except Exception as err: - logger.info(f"greeting [{greeting}] send failed, err: {err}") - ten.on_start_done() + ten_env.log_info(f"greeting [{greeting}] send failed, err: {err}") + ten_env.on_start_done() - def on_stop(self, ten: TenEnv) -> None: - logger.info("BedrockLLMExtension on_stop") - ten.on_stop_done() + def on_stop(self, ten_env: TenEnv) -> None: + ten_env.log_info("BedrockLLMExtension on_stop") + ten_env.on_stop_done() - def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: - logger.info("BedrockLLMExtension on_cmd") + def on_cmd(self, ten_env: TenEnv, cmd: Cmd) -> None: + ten_env.log_info("BedrockLLMExtension on_cmd") cmd_json = cmd.to_json() - logger.info("BedrockLLMExtension on_cmd json: " + cmd_json) + ten_env.log_info(f"BedrockLLMExtension on_cmd json: {cmd_json}") cmd_name = cmd.get_name() if cmd_name == CMD_IN_FLUSH: self.outdate_ts = get_current_time() cmd_out = Cmd.create(CMD_OUT_FLUSH) - ten.send_cmd(cmd_out, None) - logger.info(f"BedrockLLMExtension on_cmd sent flush") + ten_env.send_cmd(cmd_out, None) + ten_env.log_info("BedrockLLMExtension on_cmd sent flush") else: - logger.info(f"BedrockLLMExtension on_cmd unknown cmd: {cmd_name}") + ten_env.log_info(f"BedrockLLMExtension on_cmd unknown cmd: {cmd_name}") cmd_result = CmdResult.create(StatusCode.ERROR) cmd_result.set_property_string("detail", "unknown cmd") - ten.return_result(cmd_result, cmd) + ten_env.return_result(cmd_result, cmd) return cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") - ten.return_result(cmd_result, cmd) + ten_env.return_result(cmd_result, cmd) - def on_data(self, ten: TenEnv, data: Data) -> None: + def on_data(self, ten_env: TenEnv, data: Data) -> None: """ on_data receives data from ten graph. current suppotend data: @@ -185,16 +186,16 @@ def on_data(self, ten: TenEnv, data: Data) -> None: example: {name: text_data, properties: {text: "hello"} """ - logger.info(f"BedrockLLMExtension on_data") + ten_env.log_info("BedrockLLMExtension on_data") # Assume 'data' is an object from which we can get properties try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) if not is_final: - logger.info("ignore non-final input") + ten_env.log_info("ignore non-final input") return except Exception as err: - logger.info( + ten_env.log_info( f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" ) return @@ -203,11 +204,11 @@ def on_data(self, ten: TenEnv, data: Data) -> None: try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) if not input_text: - logger.info("ignore empty text") + ten_env.log_info("ignore empty text") return - logger.info(f"OnData input text: [{input_text}]") + ten_env.log_info(f"OnData input text: [{input_text}]") except Exception as err: - logger.info( + ten_env.log_info( f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" ) return @@ -215,12 +216,12 @@ def on_data(self, ten: TenEnv, data: Data) -> None: # Prepare memory. A conversation must alternate between user and assistant roles while len(self.memory): if len(self.memory) > self.max_memory_length: - logger.debug( + ten_env.log_debug( f"pop out first message, reason: memory length limit: `{self.memory[0]}`" ) self.memory.pop(0) elif self.memory[0]["role"] == "assistant": - logger.debug( + ten_env.log_debug( f"pop out first message, reason: messages can not start with assistant: `{self.memory[0]}`" ) self.memory.pop(0) @@ -229,8 +230,8 @@ def on_data(self, ten: TenEnv, data: Data) -> None: if len(self.memory) and self.memory[-1]["role"] == "user": # if last user input got empty response, append current user input. - logger.debug( - f"found last message with role `user`, will append this input into last user input" + ten_env.log_debug( + "found last message with role `user`, will append this input into last user input" ) self.memory[-1]["content"].append({"text": input_text}) else: @@ -238,14 +239,14 @@ def on_data(self, ten: TenEnv, data: Data) -> None: def converse_stream_worker(start_time, input_text, memory): try: - logger.info( + ten_env.log_info( f"GetConverseStream for input text: [{input_text}] memory: {memory}" ) # Get result from Bedrock resp = self.bedrock_llm.get_converse_stream(memory) if resp is None or resp.get("stream") is None: - logger.info( + ten_env.log_info( f"GetConverseStream for input text: [{input_text}] failed" ) return @@ -258,7 +259,7 @@ def converse_stream_worker(start_time, input_text, memory): for event in stream: # allow 100ms buffer time, in case interruptor's flush cmd comes just after on_data event if (start_time + 100_000) < self.outdate_ts: - logger.info( + ten_env.log_info( f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}, delta > 100ms" ) break @@ -274,7 +275,7 @@ def converse_stream_worker(start_time, input_text, memory): or "throttlingException" in event or "validationException" in event ): - logger.error(f"GetConverseStream Error occured: {event}") + ten_env.log_error(f"GetConverseStream Error occured: {event}") break else: # ingore other events @@ -287,9 +288,9 @@ def converse_stream_worker(start_time, input_text, memory): sentence, content ) if not sentence or not sentence_is_final: - logger.info(f"sentence [{sentence}] is empty or not final") + ten_env.log_info(f"sentence [{sentence}] is empty or not final") break - logger.info( + ten_env.log_info( f"GetConverseStream recv for input text: [{input_text}] got sentence: [{sentence}]" ) @@ -302,12 +303,12 @@ def converse_stream_worker(start_time, input_text, memory): output_data.set_property_bool( DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False ) - ten.send_data(output_data) - logger.info( + ten_env.send_data(output_data) + ten_env.log_info( f"GetConverseStream recv for input text: [{input_text}] sent sentence [{sentence}]" ) except Exception as err: - logger.info( + ten_env.log_info( f"GetConverseStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}" ) break @@ -315,7 +316,7 @@ def converse_stream_worker(start_time, input_text, memory): sentence = "" if not first_sentence_sent: first_sentence_sent = True - logger.info( + ten_env.log_info( f"GetConverseStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms" ) @@ -329,7 +330,7 @@ def converse_stream_worker(start_time, input_text, memory): ) else: # can not put empty model response into memory - logger.error( + ten_env.log_error( f"GetConverseStream recv for input text: [{input_text}] failed: empty response [{full_content}]" ) return @@ -343,17 +344,17 @@ def converse_stream_worker(start_time, input_text, memory): output_data.set_property_bool( DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True ) - ten.send_data(output_data) - logger.info( + ten_env.send_data(output_data) + ten_env.log_info( f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent" ) except Exception as err: - logger.info( + ten_env.log_info( f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}" ) except Exception as e: - logger.info( + ten_env.log_info( f"GetConverseStream for input text: [{input_text}] failed, err: {e}" ) @@ -363,11 +364,11 @@ def converse_stream_worker(start_time, input_text, memory): target=converse_stream_worker, args=(start_time, input_text, self.memory) ) thread.start() - logger.info(f"BedrockLLMExtension on_data end") + ten_env.log_info("BedrockLLMExtension on_data end") @register_addon_as_extension("bedrock_llm_python") class BedrockLLMExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - logger.info("on_create_instance") + ten.log_info("on_create_instance") ten.on_create_instance_done(BedrockLLMExtension(addon_name), context) diff --git a/agents/ten_packages/extension/bedrock_llm_python/log.py b/agents/ten_packages/extension/bedrock_llm_python/log.py deleted file mode 100644 index 7261cd01..00000000 --- a/agents/ten_packages/extension/bedrock_llm_python/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("bedrock_llm_python") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/bedrock_llm_python/manifest.json b/agents/ten_packages/extension/bedrock_llm_python/manifest.json index a031ec3d..41eda41e 100644 --- a/agents/ten_packages/extension/bedrock_llm_python/manifest.json +++ b/agents/ten_packages/extension/bedrock_llm_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "bedrock_llm_python", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn b/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn index 6fe94dbb..68d4594a 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn +++ b/agents/ten_packages/extension/bingsearch_tool_python/BUILD.gn @@ -14,7 +14,6 @@ ten_package("bingsearch_tool_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", ] diff --git a/agents/ten_packages/extension/bingsearch_tool_python/__init__.py b/agents/ten_packages/extension/bingsearch_tool_python/__init__.py index 57f5e81b..8cd75dde 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/__init__.py +++ b/agents/ten_packages/extension/bingsearch_tool_python/__init__.py @@ -6,6 +6,3 @@ # # from . import addon -from .log import logger - -logger.info("bingsearch_tool_python extension loaded") diff --git a/agents/ten_packages/extension/bingsearch_tool_python/addon.py b/agents/ten_packages/extension/bingsearch_tool_python/addon.py index 93f4e691..4d066229 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/addon.py +++ b/agents/ten_packages/extension/bingsearch_tool_python/addon.py @@ -17,6 +17,5 @@ class BingSearchToolExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: from .extension import BingSearchToolExtension - from .log import logger - logger.info("BingSearchToolExtensionAddon on_create_instance") + ten_env.log_info("BingSearchToolExtensionAddon on_create_instance") ten_env.on_create_instance_done(BingSearchToolExtension(name), context) diff --git a/agents/ten_packages/extension/bingsearch_tool_python/extension.py b/agents/ten_packages/extension/bingsearch_tool_python/extension.py index 515f24e4..0c3587df 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/extension.py +++ b/agents/ten_packages/extension/bingsearch_tool_python/extension.py @@ -7,24 +7,15 @@ # import json import aiohttp -import requests from typing import Any, List from ten import ( - AudioFrame, - VideoFrame, - Extension, - TenEnv, Cmd, - StatusCode, - CmdResult, - Data, ) from ten.async_ten_env import AsyncTenEnv from ten_ai_base.helper import get_properties_string -from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension +from ten_ai_base import AsyncLLMToolBaseExtension from ten_ai_base.types import LLMToolMetadata, LLMToolMetadataParameter, LLMToolResult -from .log import logger CMD_TOOL_REGISTER = "tool_register" CMD_TOOL_CALL = "tool_call" @@ -41,10 +32,10 @@ TOOL_PARAMETERS = { "type": "object", "properties": { - "query": { - "type": "string", - "description": "The search query to call Bing Search." - } + "query": { + "type": "string", + "description": "The search query to call Bing Search.", + } }, "required": ["query"], } @@ -89,15 +80,16 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: await super().on_start(ten_env) get_properties_string( - ten_env, [PROPERTY_API_KEY], lambda name, value: setattr(self, name, value)) + ten_env, [PROPERTY_API_KEY], lambda name, value: setattr(self, name, value) + ) if not self.api_key: - ten_env.log_info(f"API key is missing, exiting on_start") + ten_env.log_info("API key is missing, exiting on_start") return async def on_stop(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_stop") - # TODO: clean up resources + # clean up resources if self.session and not self.session.closed: await self.session.close() self.session = None # Ensure it can't be reused accidentally @@ -124,19 +116,21 @@ def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]: ) ] - async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult: + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: if name == TOOL_NAME: - result = await self._do_search(args) + result = await self._do_search(ten_env, args) # result = LLMCompletionContentItemText(text="I see something") return {"content": json.dumps(result)} - async def _do_search(self, args: dict) -> Any: + async def _do_search(self, ten_env: AsyncTenEnv, args: dict) -> Any: if "query" not in args: - raise Exception("Failed to get property") + raise ValueError("Failed to get property") query = args["query"] snippets = [] - results = await self._bing_search_results(query, count=self.k) + results = await self._bing_search_results(ten_env, query, count=self.k) if len(results) == 0: return "No good Bing Search Result was found" @@ -145,23 +139,25 @@ async def _do_search(self, args: dict) -> Any: return snippets - async def _initialize_session(self): + async def _initialize_session(self, ten_env: AsyncTenEnv): if self.session is None or self.session.closed: - logger.debug("Initializing new session") + ten_env.log_debug("Initializing new session") self.session = aiohttp.ClientSession() - async def _bing_search_results(self, search_term: str, count: int) -> List[dict]: - await self._initialize_session() + async def _bing_search_results(self, ten_env: AsyncTenEnv, search_term: str, count: int) -> List[dict]: + await self._initialize_session(ten_env) headers = {"Ocp-Apim-Subscription-Key": self.api_key} params = { "q": search_term, "count": count, "textDecorations": "true", - "textFormat": "HTML" + "textFormat": "HTML", } async with self.session as session: - async with session.get(DEFAULT_BING_SEARCH_ENDPOINT, headers=headers, params=params) as response: + async with session.get( + DEFAULT_BING_SEARCH_ENDPOINT, headers=headers, params=params + ) as response: response.raise_for_status() search_results = await response.json() diff --git a/agents/ten_packages/extension/bingsearch_tool_python/log.py b/agents/ten_packages/extension/bingsearch_tool_python/log.py deleted file mode 100644 index 82cb60b5..00000000 --- a/agents/ten_packages/extension/bingsearch_tool_python/log.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by Wei Hu in 2024-08. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# -import logging - -logger = logging.getLogger("bingsearch_tool_python") -logger.setLevel(logging.INFO) - -formatter_str = ( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " - "[%(filename)s:%(lineno)d] - %(message)s" -) -formatter = logging.Formatter(formatter_str) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/bingsearch_tool_python/manifest.json b/agents/ten_packages/extension/bingsearch_tool_python/manifest.json index bba45926..600f0039 100644 --- a/agents/ten_packages/extension/bingsearch_tool_python/manifest.json +++ b/agents/ten_packages/extension/bingsearch_tool_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -29,21 +29,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/extension/cartesia_tts/extension.py b/agents/ten_packages/extension/cartesia_tts/extension.py index 5729cfe6..03a36b98 100644 --- a/agents/ten_packages/extension/cartesia_tts/extension.py +++ b/agents/ten_packages/extension/cartesia_tts/extension.py @@ -11,6 +11,7 @@ ) from ten_ai_base.tts import AsyncTTSBaseExtension + class CartesiaTTSExtension(AsyncTTSBaseExtension): def __init__(self, name: str) -> None: super().__init__(name) @@ -25,13 +26,13 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: try: await super().on_start(ten_env) ten_env.log_debug("on_start") - self.config = CartesiaTTSConfig.create(ten_env=ten_env) + self.config = await CartesiaTTSConfig.create_async(ten_env=ten_env) if not self.config.api_key: raise ValueError("api_key is required") self.client = CartesiaTTS(self.config) - except Exception as err: + except Exception: ten_env.log_error(f"on_start failed: {traceback.format_exc()}") async def on_stop(self, ten_env: AsyncTenEnv) -> None: @@ -42,11 +43,13 @@ async def on_deinit(self, ten_env: AsyncTenEnv) -> None: await super().on_deinit(ten_env) ten_env.log_debug("on_deinit") - async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + async def on_request_tts( + self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool + ) -> None: audio_stream = await self.client.text_to_speech_stream(input_text) async for audio_data in audio_stream: - self.send_audio_out(ten_env, audio_data["audio"]) + await self.send_audio_out(ten_env, audio_data["audio"]) async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: - return await super().on_cancel_tts(ten_env) \ No newline at end of file + return await super().on_cancel_tts(ten_env) diff --git a/agents/ten_packages/extension/cartesia_tts/manifest.json b/agents/ten_packages/extension/cartesia_tts/manifest.json index 85912f14..df2b50a4 100644 --- a/agents/ten_packages/extension/cartesia_tts/manifest.json +++ b/agents/ten_packages/extension/cartesia_tts/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "cartesia_tts", - "version": "0.4.2", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4.2" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/chat_transcriber_python/__init__.py b/agents/ten_packages/extension/chat_transcriber_python/__init__.py deleted file mode 100644 index 3a570f49..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from . import chat_transcriber_addon -from .log import logger - -logger.info("chat_transcriber_python extension loaded") diff --git a/agents/ten_packages/extension/chat_transcriber_python/chat_transcriber_addon.py b/agents/ten_packages/extension/chat_transcriber_python/chat_transcriber_addon.py deleted file mode 100644 index 12b36ce8..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/chat_transcriber_addon.py +++ /dev/null @@ -1,17 +0,0 @@ -from ten import ( - Addon, - register_addon_as_extension, - TenEnv, -) - - -@register_addon_as_extension("chat_transcriber_python") -class ChatTranscriberExtensionAddon(Addon): - def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger - logger.info("on_create_instance") - - from .chat_transcriber_extension import ChatTranscriberExtension - - ten.on_create_instance_done(ChatTranscriberExtension(addon_name), context) - diff --git a/agents/ten_packages/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/ten_packages/extension/chat_transcriber_python/chat_transcriber_extension.py deleted file mode 100644 index 41a39bec..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/chat_transcriber_extension.py +++ /dev/null @@ -1,136 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by XinHui Li in 2024-07. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# - -import json -from ten import ( - Extension, - TenEnv, - Cmd, - Data, - StatusCode, - CmdResult, -) -import time -from .pb import chat_text_pb2 as pb -from .log import logger - -CMD_NAME_FLUSH = "flush" - -TEXT_DATA_TEXT_FIELD = "text" -TEXT_DATA_FINAL_FIELD = "is_final" -TEXT_DATA_STREAM_ID_FIELD = "stream_id" -TEXT_DATA_END_OF_SEGMENT_FIELD = "end_of_segment" - -# record the cached text data for each stream id -cached_text_map = {} - - -class ChatTranscriberExtension(Extension): - def on_start(self, ten: TenEnv) -> None: - logger.info("on_start") - ten.on_start_done() - - def on_stop(self, ten: TenEnv) -> None: - logger.info("on_stop") - ten.on_stop_done() - - def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: - logger.info("on_cmd") - cmd_json = cmd.to_json() - logger.info("on_cmd json: {}".format(cmd_json)) - - cmd_result = CmdResult.create(StatusCode.OK) - cmd_result.set_property_string("detail", "success") - ten.return_result(cmd_result, cmd) - - def on_data(self, ten: TenEnv, data: Data) -> None: - """ - on_data receives data from ten graph. - current suppotend data: - - name: text_data - example: - {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} - """ - logger.info(f"on_data") - - try: - text = data.get_property_string(TEXT_DATA_TEXT_FIELD) - except Exception as e: - logger.exception( - f"on_data get_property_string {TEXT_DATA_TEXT_FIELD} error: {e}" - ) - return - - try: - final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) - except Exception as e: - logger.exception( - f"on_data get_property_bool {TEXT_DATA_FINAL_FIELD} error: {e}" - ) - return - - try: - stream_id = data.get_property_int(TEXT_DATA_STREAM_ID_FIELD) - except Exception as e: - logger.exception( - f"on_data get_property_int {TEXT_DATA_STREAM_ID_FIELD} error: {e}" - ) - return - - try: - end_of_segment = data.get_property_bool(TEXT_DATA_END_OF_SEGMENT_FIELD) - except Exception as e: - logger.exception( - f"on_data get_property_bool {TEXT_DATA_END_OF_SEGMENT_FIELD} error: {e}" - ) - return - - logger.debug( - f"on_data {TEXT_DATA_TEXT_FIELD}: {text} {TEXT_DATA_FINAL_FIELD}: {final} {TEXT_DATA_STREAM_ID_FIELD}: {stream_id} {TEXT_DATA_END_OF_SEGMENT_FIELD}: {end_of_segment}" - ) - - # We cache all final text data and append the non-final text data to the cached data - # until the end of the segment. - if end_of_segment: - if stream_id in cached_text_map: - text = cached_text_map[stream_id] + text - del cached_text_map[stream_id] - else: - if final: - if stream_id in cached_text_map: - text = cached_text_map[stream_id] + text - - cached_text_map[stream_id] = text - - pb_text = pb.Text( - uid=stream_id, - data_type="transcribe", - texttime=int(time.time() * 1000), # Convert to milliseconds - words=[ - pb.Word( - text=text, - is_final=end_of_segment, - ), - ], - ) - - try: - pb_serialized_text = pb_text.SerializeToString() - except Exception as e: - logger.warning(f"on_data SerializeToString error: {e}") - return - - try: - # convert the origin text data to the protobuf data and send it to the graph. - ten_data = Data.create("data") - ten_data.set_property_buf("data", pb_serialized_text) - ten.send_data(ten_data) - logger.info("data sent") - except Exception as e: - logger.warning(f"on_data new_data error: {e}") - return diff --git a/agents/ten_packages/extension/chat_transcriber_python/log.py b/agents/ten_packages/extension/chat_transcriber_python/log.py deleted file mode 100644 index e30358c1..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("chat_transcriber_python") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/chat_transcriber_python/manifest.json b/agents/ten_packages/extension/chat_transcriber_python/manifest.json deleted file mode 100644 index 6354d8fb..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/manifest.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "type": "extension", - "name": "chat_transcriber_python", - "version": "0.4.0", - "dependencies": [ - { - "type": "system", - "name": "ten_runtime_python", - "version": "0.4" - } - ], - "api": { - "property": {}, - "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "is_final": { - "type": "bool" - }, - "stream_id": { - "type": "uint32" - }, - "end_of_segment": { - "type": "bool" - } - } - } - ], - "data_out": [ - { - "name": "data" - } - ] - } -} \ No newline at end of file diff --git a/agents/ten_packages/extension/chat_transcriber_python/pb/chat_text.proto b/agents/ten_packages/extension/chat_transcriber_python/pb/chat_text.proto deleted file mode 100644 index c770eb49..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/pb/chat_text.proto +++ /dev/null @@ -1,37 +0,0 @@ -syntax = "proto3"; - -package agora.chat_transcriber_python; -option go_package = ".;pb"; - -message Text { - int32 vendor = 1; - int32 version = 2; - int32 seqnum = 3; - int32 uid = 4; - int32 flag = 5; - int64 time = 6; // final time =first nofinal time - int32 lang = 7; - int32 starttime = 8; - int32 offtime = 9; - repeated Word words = 10; - bool end_of_segment = 11; - int32 duration_ms = 12; - string data_type = 13; // transcribe ,translate - repeated Translation trans = 14; - string culture = 15; - int64 texttime = 16; // pkg timestamp -} - -message Word { - string text = 1; - int32 start_ms = 2; - int32 duration_ms = 3; - bool is_final = 4; - double confidence = 5; -} - -message Translation { - bool is_final = 1; - string lang = 2; - repeated string texts = 3; -} \ No newline at end of file diff --git a/agents/ten_packages/extension/chat_transcriber_python/pb/chat_text_pb2.py b/agents/ten_packages/extension/chat_transcriber_python/pb/chat_text_pb2.py deleted file mode 100644 index 1ef2e5d0..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/pb/chat_text_pb2.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: chat_text.proto -# Protobuf Python Version: 5.26.0 -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x63hat_text.proto\x12\x1d\x61gora.chat_transcriber_python\"\xe4\x02\n\x04Text\x12\x0e\n\x06vendor\x18\x01 \x01(\x05\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12\x0e\n\x06seqnum\x18\x03 \x01(\x05\x12\x0b\n\x03uid\x18\x04 \x01(\x05\x12\x0c\n\x04\x66lag\x18\x05 \x01(\x05\x12\x0c\n\x04time\x18\x06 \x01(\x03\x12\x0c\n\x04lang\x18\x07 \x01(\x05\x12\x11\n\tstarttime\x18\x08 \x01(\x05\x12\x0f\n\x07offtime\x18\t \x01(\x05\x12\x32\n\x05words\x18\n \x03(\x0b\x32#.agora.chat_transcriber_python.Word\x12\x16\n\x0e\x65nd_of_segment\x18\x0b \x01(\x08\x12\x13\n\x0b\x64uration_ms\x18\x0c \x01(\x05\x12\x11\n\tdata_type\x18\r \x01(\t\x12\x39\n\x05trans\x18\x0e \x03(\x0b\x32*.agora.chat_transcriber_python.Translation\x12\x0f\n\x07\x63ulture\x18\x0f \x01(\t\x12\x10\n\x08texttime\x18\x10 \x01(\x03\"a\n\x04Word\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08start_ms\x18\x02 \x01(\x05\x12\x13\n\x0b\x64uration_ms\x18\x03 \x01(\x05\x12\x10\n\x08is_final\x18\x04 \x01(\x08\x12\x12\n\nconfidence\x18\x05 \x01(\x01\"<\n\x0bTranslation\x12\x10\n\x08is_final\x18\x01 \x01(\x08\x12\x0c\n\x04lang\x18\x02 \x01(\t\x12\r\n\x05texts\x18\x03 \x03(\tB\x06Z\x04.;pbb\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chat_text_pb2', _globals) -if not _descriptor._USE_C_DESCRIPTORS: - _globals['DESCRIPTOR']._loaded_options = None - _globals['DESCRIPTOR']._serialized_options = b'Z\004.;pb' - _globals['_TEXT']._serialized_start=51 - _globals['_TEXT']._serialized_end=407 - _globals['_WORD']._serialized_start=409 - _globals['_WORD']._serialized_end=506 - _globals['_TRANSLATION']._serialized_start=508 - _globals['_TRANSLATION']._serialized_end=568 -# @@protoc_insertion_point(module_scope) diff --git a/agents/ten_packages/extension/chat_transcriber_python/property.json b/agents/ten_packages/extension/chat_transcriber_python/property.json deleted file mode 100644 index 9e26dfee..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/property.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/agents/ten_packages/extension/chat_transcriber_python/requirements.txt b/agents/ten_packages/extension/chat_transcriber_python/requirements.txt deleted file mode 100644 index eee905ed..00000000 --- a/agents/ten_packages/extension/chat_transcriber_python/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -protobuf~=4.25.4 -google==3.0.0 \ No newline at end of file diff --git a/agents/ten_packages/extension/cosy_tts_python/cosy_tts.py b/agents/ten_packages/extension/cosy_tts_python/cosy_tts.py index 65e3e866..3fc5391a 100644 --- a/agents/ten_packages/extension/cosy_tts_python/cosy_tts.py +++ b/agents/ten_packages/extension/cosy_tts_python/cosy_tts.py @@ -1,14 +1,12 @@ import asyncio from dataclasses import dataclass -from typing import AsyncIterator from websocket import WebSocketConnectionClosedException from ten.async_ten_env import AsyncTenEnv from ten_ai_base.config import BaseConfig import dashscope -from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse -from dashscope.audio.tts_v2 import * +from dashscope.audio.tts_v2 import SpeechSynthesizer, AudioFormat, ResultCallback @dataclass @@ -47,7 +45,9 @@ def on_event(self, message: str) -> None: def on_data(self, data: bytes) -> None: if self.closed: - self.ten_env.log_warn(f"received data: {len(data)} bytes but connection was closed") + self.ten_env.log_warn( + f"received data: {len(data)} bytes but connection was closed" + ) return self.ten_env.log_debug(f"received data: {len(data)} bytes") asyncio.run_coroutine_threadsafe(self.queue.put(data), self.loop) @@ -60,10 +60,12 @@ def __init__(self, config: CosyTTSConfig) -> None: self.queue = asyncio.Queue() dashscope.api_key = config.api_key - def _create_synthesizer(self, ten_env: AsyncTenEnv, callback: AsyncIteratorCallback): + def _create_synthesizer( + self, ten_env: AsyncTenEnv, callback: AsyncIteratorCallback + ): if self.synthesizer: self.synthesizer = None - + ten_env.log_info("Creating new synthesizer") self.synthesizer = SpeechSynthesizer( model=self.config.model, diff --git a/agents/ten_packages/extension/cosy_tts_python/extension.py b/agents/ten_packages/extension/cosy_tts_python/extension.py index 63041186..982476cf 100644 --- a/agents/ten_packages/extension/cosy_tts_python/extension.py +++ b/agents/ten_packages/extension/cosy_tts_python/extension.py @@ -3,7 +3,6 @@ # Licensed under the Apache License, Version 2.0. # See the LICENSE file for more information. # -from asyncio import sleep import asyncio from .cosy_tts import CosyTTS, CosyTTSConfig from ten import ( @@ -26,12 +25,11 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: await super().on_start(ten_env) ten_env.log_debug("on_start") - self.config = CosyTTSConfig.create(ten_env=ten_env) + self.config = await CosyTTSConfig.create_async(ten_env=ten_env) self.client = CosyTTS(self.config) asyncio.create_task(self._process_audio_data(ten_env)) - async def on_stop(self, ten_env: AsyncTenEnv) -> None: await super().on_stop(ten_env) ten_env.log_debug("on_stop") @@ -49,11 +47,12 @@ async def _process_audio_data(self, ten_env: AsyncTenEnv) -> None: if audio_data is None: break - self.send_audio_out(ten_env, audio_data) - + await self.send_audio_out(ten_env, audio_data) - async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + async def on_request_tts( + self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool + ) -> None: self.client.text_to_speech_stream(ten_env, input_text, end_of_segment) - + async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: self.client.cancel(ten_env) diff --git a/agents/ten_packages/extension/cosy_tts_python/manifest.json b/agents/ten_packages/extension/cosy_tts_python/manifest.json index d32d2d2c..30a3da78 100644 --- a/agents/ten_packages/extension/cosy_tts_python/manifest.json +++ b/agents/ten_packages/extension/cosy_tts_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "cosy_tts_python", - "version": "0.4.2", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4.2" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/coze_python_async/BUILD.gn b/agents/ten_packages/extension/coze_python_async/BUILD.gn index 05054fcf..dafb851d 100644 --- a/agents/ten_packages/extension/coze_python_async/BUILD.gn +++ b/agents/ten_packages/extension/coze_python_async/BUILD.gn @@ -12,9 +12,7 @@ ten_package("coze_python_async") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", - "tests", ] } diff --git a/agents/ten_packages/extension/coze_python_async/extension.py b/agents/ten_packages/extension/coze_python_async/extension.py index 5548c0df..eb70133e 100644 --- a/agents/ten_packages/extension/coze_python_async/extension.py +++ b/agents/ten_packages/extension/coze_python_async/extension.py @@ -24,9 +24,18 @@ Data, ) -from ten_ai_base import BaseConfig, ChatMemory -from ten_ai_base.llm import AsyncLLMBaseExtension, LLMCallCompletionArgs, LLMDataCompletionArgs, LLMToolMetadata -from ten_ai_base.types import LLMChatCompletionUserMessageParam, LLMToolResult +from ten_ai_base.config import BaseConfig +from ten_ai_base.chat_memory import ChatMemory +from ten_ai_base import ( + AsyncLLMBaseExtension, +) + +from ten_ai_base.types import ( + LLMChatCompletionUserMessageParam, + LLMCallCompletionArgs, + LLMDataCompletionArgs, + LLMToolMetadata, +) CMD_IN_FLUSH = "flush" CMD_IN_ON_USER_JOINED = "on_user_joined" @@ -42,11 +51,13 @@ CMD_PROPERTY_RESULT = "tool_result" + def is_punctuation(char): if char in [",", ",", ".", "。", "?", "?", "!", "!"]: return True return False + def parse_sentences(sentence_fragment, content): sentences = [] current_sentence = sentence_fragment @@ -61,6 +72,7 @@ def parse_sentences(sentence_fragment, content): remain = current_sentence return sentences, remain + @dataclass class CozeConfig(BaseConfig): base_url: str = "https://api.acoze.com" @@ -70,8 +82,9 @@ class CozeConfig(BaseConfig): greeting: str = "" max_history: int = 32 + class AsyncCozeExtension(AsyncLLMBaseExtension): - config : CozeConfig = None + config: CozeConfig = None sentence_fragment: str = "" ten_env: AsyncTenEnv = None loop: asyncio.AbstractEventLoop = None @@ -80,7 +93,7 @@ class AsyncCozeExtension(AsyncLLMBaseExtension): memory: ChatMemory = None acoze: AsyncCoze = None - #conversation: str = "" + # conversation: str = "" async def on_init(self, ten_env: AsyncTenEnv) -> None: await super().on_init(ten_env) @@ -92,21 +105,23 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: self.loop = asyncio.get_event_loop() - self.config = CozeConfig.create(ten_env=ten_env) + self.config = await CozeConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") if not self.config.bot_id or not self.config.token: ten_env.log_error("Missing required configuration") return - + self.memory = ChatMemory(self.config.max_history) try: - self.acoze = AsyncCoze(auth=TokenAuth(token=self.config.token), base_url=self.config.base_url) - ''' - self.conversation = await self.acoze.conversations.create(messages = [ - Message.build_user_question_text(self.config.prompt) - ] if self.config.prompt else []) - ''' + self.acoze = AsyncCoze( + auth=TokenAuth(token=self.config.token), base_url=self.config.base_url + ) + + # self.conversation = await self.acoze.conversations.create(messages = [ + # Message.build_user_question_text(self.config.prompt) + # ] if self.config.prompt else []) + except Exception as e: ten_env.log_error(f"Failed to create conversation {e}") @@ -146,23 +161,29 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_result = CmdResult.create(status) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) - async def on_call_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs) -> any: - raise Exception("Not implemented") + async def on_call_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs + ) -> any: + raise RuntimeError("Not implemented") - async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs) -> None: + async def on_data_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs + ) -> None: if not self.acoze: - await self._send_text("Coze is not connected. Please check your configuration.", True) + await self._send_text( + "Coze is not connected. Please check your configuration.", True + ) return - - input: LLMChatCompletionUserMessageParam = kargs.get("messages", []) + + input_messages: LLMChatCompletionUserMessageParam = kargs.get("messages", []) messages = copy.copy(self.memory.get()) - if not input: + if not input_messages: ten_env.log_warn("No message in data") else: - messages.extend(input) - for i in input: + messages.extend(input_messages) + for i in input_messages: self.memory.put(i) total_output = "" @@ -178,7 +199,8 @@ async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCo if message.event == ChatEventType.CONVERSATION_MESSAGE_DELTA: total_output += message.message.content sentences, sentence_fragment = parse_sentences( - sentence_fragment, message.message.content) + sentence_fragment, message.message.content + ) for s in sentences: await self._send_text(s, False) elif message.event == ChatEventType.CONVERSATION_MESSAGE_COMPLETED: @@ -189,17 +211,22 @@ async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCo elif message.event == ChatEventType.CONVERSATION_CHAT_FAILED: last_error = message.chat.last_error if last_error and last_error.code == 4011: - await self._send_text("The Coze token has been depleted. Please check your token usage.", True) + await self._send_text( + "The Coze token has been depleted. Please check your token usage.", + True, + ) else: await self._send_text(last_error.msg, True) except Exception as e: self.ten_env.log_error(f"Failed to parse response: {message} {e}") traceback.print_exc() - + self.memory.put({"role": "assistant", "content": total_output}) self.ten_env.log_info(f"total_output: {total_output} {calls}") - async def on_tools_update(self, ten_env: AsyncTenEnv, tool: LLMToolMetadata) -> None: + async def on_tools_update( + self, ten_env: AsyncTenEnv, tool: LLMToolMetadata + ) -> None: # Implement the logic for tool updates return await super().on_tools_update(ten_env, tool) @@ -212,12 +239,16 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) except Exception as err: - ten_env.log_info(f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + ten_env.log_info( + f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) except Exception as err: - ten_env.log_info(f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + ten_env.log_info( + f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) if not is_final: ten_env.log_info("ignore non-final input") @@ -229,40 +260,53 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: ten_env.log_info(f"OnData input text: [{input_text}]") # Start an asynchronous task for handling chat completion - message = LLMChatCompletionUserMessageParam( - role="user", content=input_text) + message = LLMChatCompletionUserMessageParam(role="user", content=input_text) await self.queue_input_item(False, messages=[message]) - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: pass - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + async def on_video_frame( + self, ten_env: AsyncTenEnv, video_frame: VideoFrame + ) -> None: pass async def _send_text(self, text: str, end_of_segment: bool) -> None: data = Data.create("text_data") data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, text) - data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, end_of_segment) - self.ten_env.send_data(data) - - async def _stream_chat(self, messages: List[Any]) -> AsyncGenerator[ChatEvent, None]: + data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, end_of_segment + ) + asyncio.create_task(self.ten_env.send_data(data)) + + async def _stream_chat( + self, messages: List[Any] + ) -> AsyncGenerator[ChatEvent, None]: additionals = [] for m in messages: if m["role"] == "user": - additionals.append(Message.build_user_question_text(m["content"]).model_dump()) + additionals.append( + Message.build_user_question_text(m["content"]).model_dump() + ) elif m["role"] == "assistant": - additionals.append(Message.build_assistant_answer(m["content"]).model_dump()) + additionals.append( + Message.build_assistant_answer(m["content"]).model_dump() + ) - def chat_stream_handler(event:str, event_data:Any) -> ChatEvent: + def chat_stream_handler(event: str, event_data: Any) -> ChatEvent: if event == ChatEventType.DONE: raise StopAsyncIteration elif event == ChatEventType.ERROR: - raise Exception(f"error event: {event_data}") # TODO: error struct format + raise RuntimeError(f"error event: {event_data}") elif event in [ ChatEventType.CONVERSATION_MESSAGE_DELTA, ChatEventType.CONVERSATION_MESSAGE_COMPLETED, ]: - return ChatEvent(event=event, message=Message.model_validate_json(event_data)) + return ChatEvent( + event=event, message=Message.model_validate_json(event_data) + ) elif event in [ ChatEventType.CONVERSATION_CHAT_CREATED, ChatEventType.CONVERSATION_CHAT_IN_PROGRESS, @@ -273,7 +317,7 @@ def chat_stream_handler(event:str, event_data:Any) -> ChatEvent: return ChatEvent(event=event, chat=Chat.model_validate_json(event_data)) else: raise ValueError(f"invalid chat.event: {event}, {event_data}") - + async with aiohttp.ClientSession() as session: try: url = f"{self.config.base_url}/v3/chat" @@ -286,7 +330,7 @@ def chat_stream_handler(event:str, event_data:Any) -> ChatEvent: "additional_messages": additionals, "stream": True, "auto_save_history": True, - #"conversation_id": self.conversation.id + # "conversation_id": self.conversation.id } event = "" async with session.post(url, json=params, headers=headers) as response: @@ -294,11 +338,13 @@ def chat_stream_handler(event:str, event_data:Any) -> ChatEvent: if line: try: self.ten_env.log_info(f"line: {line}") - decoded_line = line.decode('utf-8').strip() + decoded_line = line.decode("utf-8").strip() if decoded_line: if decoded_line.startswith("data:"): data = decoded_line[5:].strip() - yield chat_stream_handler(event=event, event_data=data.strip()) + yield chat_stream_handler( + event=event, event_data=data.strip() + ) elif decoded_line.startswith("event:"): event = decoded_line[6:] self.ten_env.log_info(f"event: {event}") @@ -308,10 +354,17 @@ def chat_stream_handler(event:str, event_data:Any) -> ChatEvent: result = json.loads(decoded_line) code = result.get("code", 0) if code == 4000: - await self._send_text("Coze bot is not published.", True) + await self._send_text( + "Coze bot is not published.", True + ) else: - self.ten_env.log_error(f"Failed to stream chat: {result['code']}") - await self._send_text("Coze bot is not connected. Please check your configuration.", True) + self.ten_env.log_error( + f"Failed to stream chat: {result['code']}" + ) + await self._send_text( + "Coze bot is not connected. Please check your configuration.", + True, + ) except Exception as e: self.ten_env.log_error(f"Failed to stream chat: {e}") except Exception as e: diff --git a/agents/ten_packages/extension/coze_python_async/manifest.json b/agents/ten_packages/extension/coze_python_async/manifest.json index faa66c06..18379f4f 100644 --- a/agents/ten_packages/extension/coze_python_async/manifest.json +++ b/agents/ten_packages/extension/coze_python_async/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "coze_python_async", - "version": "0.3.1", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/deepgram_asr_python/extension.py b/agents/ten_packages/extension/deepgram_asr_python/extension.py index df357787..48e516d5 100644 --- a/agents/ten_packages/extension/deepgram_asr_python/extension.py +++ b/agents/ten_packages/extension/deepgram_asr_python/extension.py @@ -10,16 +10,22 @@ import asyncio -from deepgram import AsyncListenWebSocketClient, DeepgramClientOptions, LiveTranscriptionEvents, LiveOptions +from deepgram import ( + AsyncListenWebSocketClient, + DeepgramClientOptions, + LiveTranscriptionEvents, + LiveOptions, +) from dataclasses import dataclass -from ten_ai_base import BaseConfig +from ten_ai_base.config import BaseConfig DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" DATA_OUT_TEXT_DATA_PROPERTY_STREAM_ID = "stream_id" DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT = "end_of_segment" + @dataclass class DeepgramASRConfig(BaseConfig): api_key: str = "" @@ -28,19 +34,22 @@ class DeepgramASRConfig(BaseConfig): sample_rate: int = 16000 channels: int = 1 - encoding: str = 'linear16' + encoding: str = "linear16" interim_results: bool = True punctuate: bool = True - + + class DeepgramASRExtension(AsyncExtension): def __init__(self, name: str): super().__init__(name) self.stopped = False self.connected = False - self.client : AsyncListenWebSocketClient = None - self.config : DeepgramASRConfig = None - self.ten_env : AsyncTenEnv = None + self.client: AsyncListenWebSocketClient = None + self.config: DeepgramASRConfig = None + self.ten_env: AsyncTenEnv = None + self.loop = None + self.stream_id = -1 async def on_init(self, ten_env: AsyncTenEnv) -> None: ten_env.log_info("DeepgramASRExtension on_init") @@ -50,32 +59,32 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: self.loop = asyncio.get_event_loop() self.ten_env = ten_env - self.config = DeepgramASRConfig.create(ten_env=ten_env) + self.config = await DeepgramASRConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") if not self.config.api_key: - ten_env.log_error(f"get property api_key") + ten_env.log_error("get property api_key") return self.loop.create_task(self._start_listen()) ten_env.log_info("starting async_deepgram_wrapper thread") - async def on_audio_frame(self, ten_env: AsyncTenEnv, frame: AudioFrame) -> None: + async def on_audio_frame(self, _: AsyncTenEnv, frame: AudioFrame) -> None: frame_buf = frame.get_buf() if not frame_buf: self.ten_env.log_warn("send_frame: empty pcm_frame detected.") return - + if not self.connected: self.ten_env.log_debug("send_frame: deepgram not connected.") return - self.stream_id = frame.get_property_int('stream_id') + self.stream_id = frame.get_property_int("stream_id") if self.client: await self.client.send(frame_buf) - + async def on_stop(self, ten_env: AsyncTenEnv) -> None: ten_env.log_info("on_stop") @@ -90,40 +99,47 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") - ten_env.return_result(cmd_result, cmd) - + await ten_env.return_result(cmd_result, cmd) + async def _start_listen(self) -> None: - self.ten_env.log_info(f"start and listen deepgram") + self.ten_env.log_info("start and listen deepgram") - self.client = AsyncListenWebSocketClient(config=DeepgramClientOptions( - api_key=self.config.api_key, - options={"keepalive": "true"} - )) + self.client = AsyncListenWebSocketClient( + config=DeepgramClientOptions( + api_key=self.config.api_key, options={"keepalive": "true"} + ) + ) - async def on_open(_, open, **kwargs): - self.ten_env.log_info(f"deepgram event callback on_open: {open}") + async def on_open(_, event): + self.ten_env.log_info(f"deepgram event callback on_open: {event}") self.connected = True - async def on_close(_, close, **kwargs): - self.ten_env.log_info(f"deepgram event callback on_close: {close}") + async def on_close(_, event): + self.ten_env.log_info(f"deepgram event callback on_close: {event}") self.connected = False if not self.stopped: - self.ten_env.log_warn("Deepgram connection closed unexpectedly. Reconnecting...") + self.ten_env.log_warn( + "Deepgram connection closed unexpectedly. Reconnecting..." + ) await asyncio.sleep(0.2) self.loop.create_task(self._start_listen()) - async def on_message(_, result, **kwargs): + async def on_message(_, result): sentence = result.channel.alternatives[0].transcript if len(sentence) == 0: return is_final = result.is_final - self.ten_env.log_info(f"deepgram got sentence: [{sentence}], is_final: {is_final}, stream_id: {self.stream_id}") + self.ten_env.log_info( + f"deepgram got sentence: [{sentence}], is_final: {is_final}, stream_id: {self.stream_id}" + ) - await self._send_text(text=sentence, is_final=is_final, stream_id=self.stream_id) + await self._send_text( + text=sentence, is_final=is_final, stream_id=self.stream_id + ) - async def on_error(_, error, **kwargs): + async def on_error(_, error): self.ten_env.log_error(f"deepgram event callback on_error: {error}") self.client.on(LiveTranscriptionEvents.Open, on_open) @@ -131,26 +147,32 @@ async def on_error(_, error, **kwargs): self.client.on(LiveTranscriptionEvents.Transcript, on_message) self.client.on(LiveTranscriptionEvents.Error, on_error) - options = LiveOptions(language=self.config.language, - model=self.config.model, - sample_rate=self.config.sample_rate, - channels=self.config.channels, - encoding=self.config.encoding, - interim_results=self.config.interim_results, - punctuate=self.config.punctuate) + options = LiveOptions( + language=self.config.language, + model=self.config.model, + sample_rate=self.config.sample_rate, + channels=self.config.channels, + encoding=self.config.encoding, + interim_results=self.config.interim_results, + punctuate=self.config.punctuate, + ) + + self.ten_env.log_info(f"deepgram options: {options}") # connect to websocket result = await self.client.start(options) if not result: - self.ten_env.log_error(f"failed to connect to deepgram") + self.ten_env.log_error("failed to connect to deepgram") await asyncio.sleep(0.2) self.loop.create_task(self._start_listen()) else: - self.ten_env.log_info(f"successfully connected to deepgram") - + self.ten_env.log_info("successfully connected to deepgram") + async def _send_text(self, text: str, is_final: bool, stream_id: str) -> None: stable_data = Data.create("text_data") stable_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL, is_final) stable_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, text) stable_data.set_property_int(DATA_OUT_TEXT_DATA_PROPERTY_STREAM_ID, stream_id) - stable_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, is_final) - self.ten_env.send_data(stable_data) + stable_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, is_final + ) + asyncio.create_task(self.ten_env.send_data(stable_data)) diff --git a/agents/ten_packages/extension/deepgram_asr_python/manifest.json b/agents/ten_packages/extension/deepgram_asr_python/manifest.json index 0ba17d06..a6b96d85 100644 --- a/agents/ten_packages/extension/deepgram_asr_python/manifest.json +++ b/agents/ten_packages/extension/deepgram_asr_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/dify_python/extension.py b/agents/ten_packages/extension/dify_python/extension.py index aed1206d..2d53735c 100644 --- a/agents/ten_packages/extension/dify_python/extension.py +++ b/agents/ten_packages/extension/dify_python/extension.py @@ -4,28 +4,19 @@ # See the LICENSE file for more information. # import asyncio -import traceback -import aiohttp import json -import copy import time - -from typing import List, Any, AsyncGenerator +import traceback from dataclasses import dataclass +from typing import AsyncGenerator -from ten import ( - AudioFrame, - VideoFrame, - AsyncTenEnv, - Cmd, - StatusCode, - CmdResult, - Data, +import aiohttp +from ten import AsyncTenEnv, AudioFrame, Cmd, CmdResult, Data, StatusCode, VideoFrame +from ten_ai_base.config import BaseConfig +from ten_ai_base import ( + AsyncLLMBaseExtension, ) - -from ten_ai_base import BaseConfig, ChatMemory -from ten_ai_base.llm import AsyncLLMBaseExtension, LLMCallCompletionArgs, LLMDataCompletionArgs, LLMToolMetadata -from ten_ai_base.types import LLMChatCompletionUserMessageParam, LLMToolResult +from ten_ai_base.types import LLMChatCompletionUserMessageParam, LLMDataCompletionArgs CMD_IN_FLUSH = "flush" CMD_IN_ON_USER_JOINED = "on_user_joined" @@ -41,11 +32,13 @@ CMD_PROPERTY_RESULT = "tool_result" + def is_punctuation(char): if char in [",", ",", ".", "。", "?", "?", "!", "!"]: return True return False + def parse_sentences(sentence_fragment, content): sentences = [] current_sentence = sentence_fragment @@ -60,6 +53,7 @@ def parse_sentences(sentence_fragment, content): remain = current_sentence return sentences, remain + @dataclass class DifyConfig(BaseConfig): base_url: str = "https://api.dify.ai/v1" @@ -69,6 +63,7 @@ class DifyConfig(BaseConfig): failure_info: str = "" max_history: int = 32 + class DifyExtension(AsyncLLMBaseExtension): config: DifyConfig = None ten_env: AsyncTenEnv = None @@ -86,7 +81,7 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_start") self.loop = asyncio.get_event_loop() - self.config = DifyConfig.create(ten_env=ten_env) + self.config = await DifyConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") if not self.config.api_key: @@ -129,7 +124,7 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_result = CmdResult.create(status) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: data_name = data.get_name() @@ -140,12 +135,16 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) except Exception as err: - ten_env.log_info(f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + ten_env.log_info( + f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) except Exception as err: - ten_env.log_info(f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + ten_env.log_info( + f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) if not is_final: ten_env.log_info("ignore non-final input") @@ -157,8 +156,7 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: ten_env.log_info(f"OnData input text: [{input_text}]") # Start an asynchronous task for handling chat completion - message = LLMChatCompletionUserMessageParam( - role="user", content=input_text) + message = LLMChatCompletionUserMessageParam(role="user", content=input_text) await self.queue_input_item(False, messages=[message]) async def on_audio_frame( @@ -170,10 +168,18 @@ async def on_video_frame( self, ten_env: AsyncTenEnv, video_frame: VideoFrame ) -> None: pass - - async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs) -> None: - input: LLMChatCompletionUserMessageParam = kargs.get("messages", []) - if not input: + + async def on_call_chat_completion(self, async_ten_env, **kargs): + raise NotImplementedError + + async def on_tools_update(self, async_ten_env, tool): + raise NotImplementedError + + async def on_data_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs + ) -> None: + input_messages: LLMChatCompletionUserMessageParam = kargs.get("messages", []) + if not input_messages: ten_env.log_warn("No message in data") total_output = "" @@ -181,8 +187,8 @@ async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCo calls = {} sentences = [] - self.ten_env.log_info(f"messages: {input}") - response = self._stream_chat(query=input[0]["content"]) + self.ten_env.log_info(f"messages: {input_messages}") + response = self._stream_chat(query=input_messages[0]["content"]) async for message in response: self.ten_env.log_info(f"content: {message}") message_type = message.get("event") @@ -193,37 +199,37 @@ async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCo total_output += message.get("answer", "") sentences, sentence_fragment = parse_sentences( - sentence_fragment, message.get("answer", "")) + sentence_fragment, message.get("answer", "") + ) for s in sentences: await self._send_text(s, False) elif message_type == "message_end": metadata = message.get("metadata", {}) ten_env.log_info(f"metadata: {metadata}") - + # data: {"event": "message", "task_id": "900bbd43-dc0b-4383-a372-aa6e6c414227", "id": "663c5084-a254-4040-8ad3-51f2a3c1a77c", "answer": "Hi", "created_at": 1705398420}\n\n - ''' - try: - if message.event == ChatEventType.CONVERSATION_MESSAGE_DELTA: - total_output += message.message.content - sentences, sentence_fragment = parse_sentences( - sentence_fragment, message.message.content) - for s in sentences: - await self._send_text(s, False) - elif message.event == ChatEventType.CONVERSATION_MESSAGE_COMPLETED: - if sentence_fragment: - await self._send_text(sentence_fragment, True) - else: - await self._send_text("", True) - elif message.event == ChatEventType.CONVERSATION_CHAT_FAILED: - last_error = message.chat.last_error - if last_error and last_error.code == 4011: - await self._send_text("The Coze token has been depleted. Please check your token usage.", True) - else: - await self._send_text(last_error.msg, True) - except Exception as e: - self.ten_env.log_error(f"Failed to parse response: {message} {e}") - traceback.print_exc() - ''' + + # try: + # if message.event == ChatEventType.CONVERSATION_MESSAGE_DELTA: + # total_output += message.message.content + # sentences, sentence_fragment = parse_sentences( + # sentence_fragment, message.message.content) + # for s in sentences: + # await self._send_text(s, False) + # elif message.event == ChatEventType.CONVERSATION_MESSAGE_COMPLETED: + # if sentence_fragment: + # await self._send_text(sentence_fragment, True) + # else: + # await self._send_text("", True) + # elif message.event == ChatEventType.CONVERSATION_CHAT_FAILED: + # last_error = message.chat.last_error + # if last_error and last_error.code == 4011: + # await self._send_text("The Coze token has been depleted. Please check your token usage.", True) + # else: + # await self._send_text(last_error.msg, True) + # except Exception as e: + # self.ten_env.log_error(f"Failed to parse response: {message} {e}") + # traceback.print_exc() await self._send_text(sentence_fragment, True) self.ten_env.log_info(f"total_output: {total_output} {calls}") @@ -242,23 +248,25 @@ async def _stream_chat(self, query: str) -> AsyncGenerator[dict, None]: self.ten_env.log_info(f"payload before sending: {json.dumps(payload)}") headers = { "Authorization": f"Bearer {self.config.api_key}", - "Content-Type": "application/json" + "Content-Type": "application/json", } url = f"{self.config.base_url}/chat-messages" start_time = time.time() async with session.post(url, json=payload, headers=headers) as response: if response.status != 200: r = await response.json() - self.ten_env.log_error(f"Received unexpected status {r} from the server.") + self.ten_env.log_error( + f"Received unexpected status {r} from the server." + ) if self.config.failure_info: await self._send_text(self.config.failure_info, True) return end_time = time.time() self.ten_env.log_info(f"connect time {end_time - start_time} s") - + async for line in response.content: if line: - l = line.decode('utf-8').strip() + l = line.decode("utf-8").strip() if l.startswith("data:"): content = l[5:].strip() if content == "[DONE]": @@ -275,5 +283,7 @@ async def _stream_chat(self, query: str) -> AsyncGenerator[dict, None]: async def _send_text(self, text: str, end_of_segment: bool) -> None: data = Data.create("text_data") data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, text) - data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, end_of_segment) - self.ten_env.send_data(data) + data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, end_of_segment + ) + asyncio.create_task(self.ten_env.send_data(data)) diff --git a/agents/ten_packages/extension/dify_python/manifest.json b/agents/ten_packages/extension/dify_python/manifest.json index 239a0bb7..331ceaf4 100644 --- a/agents/ten_packages/extension/dify_python/manifest.json +++ b/agents/ten_packages/extension/dify_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "dify_python", - "version": "0.4.2", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4.2" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/elevenlabs_tts/elevenlabs_tts_extension.go b/agents/ten_packages/extension/elevenlabs_tts/elevenlabs_tts_extension.go index d4d4d305..a1085c07 100644 --- a/agents/ten_packages/extension/elevenlabs_tts/elevenlabs_tts_extension.go +++ b/agents/ten_packages/extension/elevenlabs_tts/elevenlabs_tts_extension.go @@ -13,7 +13,6 @@ package extension import ( "fmt" "io" - "log/slog" "sync" "sync/atomic" "time" @@ -42,8 +41,6 @@ const ( ) var ( - logTag = slog.String("extension", "ELEVENLABS_TTS_EXTENSION") - outdateTs atomic.Int64 textChan chan *message wg sync.WaitGroup @@ -76,20 +73,20 @@ func newElevenlabsTTSExtension(name string) ten.Extension { // - style // - voice_id func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { - slog.Info("OnStart", logTag) + ten.LogInfo("OnStart") // prepare configuration elevenlabsTTSConfig := defaultElevenlabsTTSConfig() if apiKey, err := ten.GetPropertyString(propertyApiKey); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + ten.LogError(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err)) return } else { elevenlabsTTSConfig.ApiKey = apiKey } if modelId, err := ten.GetPropertyString(propertyModelId); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err)) } else { if len(modelId) > 0 { elevenlabsTTSConfig.ModelId = modelId @@ -97,7 +94,7 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { } if optimizeStreamingLatency, err := ten.GetPropertyInt64(propertyOptimizeStreamingLatency); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err)) } else { if optimizeStreamingLatency > 0 { elevenlabsTTSConfig.OptimizeStreamingLatency = int(optimizeStreamingLatency) @@ -105,7 +102,7 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { } if requestTimeoutSeconds, err := ten.GetPropertyInt64(propertyRequestTimeoutSeconds); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err)) } else { if requestTimeoutSeconds > 0 { elevenlabsTTSConfig.RequestTimeoutSeconds = int(requestTimeoutSeconds) @@ -113,31 +110,31 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { } if similarityBoost, err := ten.GetPropertyFloat64(propertySimilarityBoost); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySimilarityBoost, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySimilarityBoost, err)) } else { elevenlabsTTSConfig.SimilarityBoost = float32(similarityBoost) } if speakerBoost, err := ten.GetPropertyBool(propertySpeakerBoost); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySpeakerBoost, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySpeakerBoost, err)) } else { elevenlabsTTSConfig.SpeakerBoost = speakerBoost } if stability, err := ten.GetPropertyFloat64(propertyStability); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStability, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStability, err)) } else { elevenlabsTTSConfig.Stability = float32(stability) } if style, err := ten.GetPropertyFloat64(propertyStyle); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStyle, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStyle, err)) } else { elevenlabsTTSConfig.Style = float32(style) } if voiceId, err := ten.GetPropertyString(propertyVoiceId); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyVoiceId, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyVoiceId, err)) } else { if len(voiceId) > 0 { elevenlabsTTSConfig.VoiceId = voiceId @@ -147,12 +144,12 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { // create elevenlabsTTS instance elevenlabsTTS, err := newElevenlabsTTS(elevenlabsTTSConfig) if err != nil { - slog.Error(fmt.Sprintf("newElevenlabsTTS failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("newElevenlabsTTS failed, err: %v", err)) return } - slog.Info(fmt.Sprintf("newElevenlabsTTS succeed with ModelId: %s, VoiceId: %s", - elevenlabsTTSConfig.ModelId, elevenlabsTTSConfig.VoiceId), logTag) + ten.LogInfo(fmt.Sprintf("newElevenlabsTTS succeed with ModelId: %s, VoiceId: %s", + elevenlabsTTSConfig.ModelId, elevenlabsTTSConfig.VoiceId)) // set elevenlabsTTS instance e.elevenlabsTTS = elevenlabsTTS @@ -165,17 +162,17 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { textChan = make(chan *message, textChanMax) go func() { - slog.Info("process textChan", logTag) + ten.LogInfo("process textChan") for msg := range textChan { if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) + ten.LogInfo(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load())) continue } wg.Add(1) - slog.Info(fmt.Sprintf("textChan text: [%s]", msg.text), logTag) + ten.LogInfo(fmt.Sprintf("textChan text: [%s]", msg.text)) r, w := io.Pipe() startTime := time.Now() @@ -184,16 +181,16 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { defer wg.Done() defer w.Close() - slog.Info(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text), logTag) + ten.LogInfo(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text)) err = e.elevenlabsTTS.textToSpeechStream(w, msg.text) if err != nil { - slog.Error(fmt.Sprintf("textToSpeechStream failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("textToSpeechStream failed, err: %v", err)) return } }() - slog.Info(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize), logTag) + ten.LogInfo(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize)) var ( firstFrameLatency int64 @@ -207,8 +204,8 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { // read pcm stream for { if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) + ten.LogInfo(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load())) break } @@ -218,16 +215,16 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { if err != nil { if err == io.EOF { - slog.Info("read pcm stream EOF", logTag) + ten.LogInfo("read pcm stream EOF") break } - slog.Error(fmt.Sprintf("read pcm stream failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("read pcm stream failed, err: %v", err)) break } if pcmFrameRead != pcmFrameSize { - slog.Debug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead), logTag) + ten.LogDebug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead)) continue } @@ -239,21 +236,21 @@ func (e *elevenlabsTTSExtension) OnStart(ten ten.TenEnv) { if firstFrameLatency == 0 { firstFrameLatency = time.Since(startTime).Milliseconds() - slog.Info(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency), logTag) + ten.LogInfo(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency)) } - slog.Debug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text), logTag) + ten.LogDebug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text)) } if pcmFrameRead > 0 { pcm.send(ten, buf) sentFrames++ - slog.Info(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead), logTag) + ten.LogInfo(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead)) } r.Close() - slog.Info(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", - msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds()), logTag) + ten.LogInfo(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", + msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds())) } }() @@ -271,13 +268,13 @@ func (e *elevenlabsTTSExtension) OnCmd( ) { cmdName, err := cmd.GetName() if err != nil { - slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("OnCmd get name failed, err: %v", err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } - slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnCmd %s", cmdInFlush)) switch cmdName { case cmdInFlush: @@ -286,24 +283,24 @@ func (e *elevenlabsTTSExtension) OnCmd( // send out outCmd, err := ten.NewCmd(cmdOutFlush) if err != nil { - slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } if err := tenEnv.SendCmd(outCmd, nil); err != nil { - slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } else { - slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("cmd %s sent", cmdOutFlush)) } } cmdResult, _ := ten.NewCmdResult(ten.StatusCodeOk) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) } // OnData receives data from ten graph. @@ -317,16 +314,16 @@ func (e *elevenlabsTTSExtension) OnData( ) { text, err := data.GetPropertyString(dataInTextDataPropertyText) if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err)) return } if len(text) == 0 { - slog.Debug("OnData text is empty, ignored", logTag) + tenEnv.LogDebug("OnData text is empty, ignored") return } - slog.Info(fmt.Sprintf("OnData input text: [%s]", text), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnData input text: [%s]", text)) go func() { textChan <- &message{text: text, receivedTs: time.Now().UnixMicro()} @@ -334,8 +331,6 @@ func (e *elevenlabsTTSExtension) OnData( } func init() { - slog.Info("elevenlabs_tts extension init", logTag) - // Register addon ten.RegisterAddonAsExtension( "elevenlabs_tts", diff --git a/agents/ten_packages/extension/elevenlabs_tts/manifest.json b/agents/ten_packages/extension/elevenlabs_tts/manifest.json index 84cc43ae..0e0a25d9 100644 --- a/agents/ten_packages/extension/elevenlabs_tts/manifest.json +++ b/agents/ten_packages/extension/elevenlabs_tts/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "elevenlabs_tts", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/elevenlabs_tts/pcm.go b/agents/ten_packages/extension/elevenlabs_tts/pcm.go index f5e248a3..a13433c0 100644 --- a/agents/ten_packages/extension/elevenlabs_tts/pcm.go +++ b/agents/ten_packages/extension/elevenlabs_tts/pcm.go @@ -12,7 +12,6 @@ package extension import ( "fmt" - "log/slog" "ten_framework/ten" ) @@ -49,10 +48,10 @@ func newPcm(config *pcmConfig) *pcm { } } -func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { +func (p *pcm) getPcmFrame(tenEnv ten.TenEnv, buf []byte) (pcmFrame ten.AudioFrame, err error) { pcmFrame, err = ten.NewAudioFrame(p.config.Name) if err != nil { - slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("NewPcmFrame failed, err: %v", err)) return } @@ -68,7 +67,7 @@ func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { borrowedBuf, err := pcmFrame.LockBuf() if err != nil { - slog.Error(fmt.Sprintf("LockBuf failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("LockBuf failed, err: %v", err)) return } @@ -88,15 +87,15 @@ func (p *pcm) newBuf() []byte { } func (p *pcm) send(tenEnv ten.TenEnv, buf []byte) (err error) { - pcmFrame, err := p.getPcmFrame(buf) + pcmFrame, err := p.getPcmFrame(tenEnv, buf) if err != nil { - slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("getPcmFrame failed, err: %v", err)) return } // send pcm - if err = tenEnv.SendAudioFrame(pcmFrame); err != nil { - slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) + if err = tenEnv.SendAudioFrame(pcmFrame, nil); err != nil { + tenEnv.LogError(fmt.Sprintf("SendPcmFrame failed, err: %v", err)) return } diff --git a/agents/ten_packages/extension/elevenlabs_tts_python/elevenlabs_tts.py b/agents/ten_packages/extension/elevenlabs_tts_python/elevenlabs_tts.py index e87761fd..afe30a13 100644 --- a/agents/ten_packages/extension/elevenlabs_tts_python/elevenlabs_tts.py +++ b/agents/ten_packages/extension/elevenlabs_tts_python/elevenlabs_tts.py @@ -7,10 +7,7 @@ # from dataclasses import dataclass -from typing import AsyncIterator, Iterator -from elevenlabs import Voice, VoiceSettings -from elevenlabs.client import AsyncElevenLabs - +from typing import AsyncIterator from ten_ai_base.config import BaseConfig @@ -26,12 +23,22 @@ class ElevenLabsTTSConfig(BaseConfig): style: float = 0.0 voice_id: str = "pNInz6obpgDQGcFmaJgB" + class ElevenLabsTTS: def __init__(self, config: ElevenLabsTTSConfig) -> None: self.config = config - self.client = AsyncElevenLabs(api_key=config.api_key, timeout=config.request_timeout_seconds) + self.client = None def text_to_speech_stream(self, text: str) -> AsyncIterator[bytes]: + # to avoid circular import issue when using openai with 11labs + from elevenlabs.client import AsyncElevenLabs + from elevenlabs import Voice, VoiceSettings + + if not self.client: + self.client = AsyncElevenLabs( + api_key=self.config.api_key, timeout=self.config.request_timeout_seconds + ) + return self.client.generate( text=text, model=self.config.model_id, @@ -47,4 +54,4 @@ def text_to_speech_stream(self, text: str) -> AsyncIterator[bytes]: speaker_boost=self.config.speaker_boost, ), ), - ) \ No newline at end of file + ) diff --git a/agents/ten_packages/extension/elevenlabs_tts_python/extension.py b/agents/ten_packages/extension/elevenlabs_tts_python/extension.py index 8e9f924b..9afcb386 100644 --- a/agents/ten_packages/extension/elevenlabs_tts_python/extension.py +++ b/agents/ten_packages/extension/elevenlabs_tts_python/extension.py @@ -4,14 +4,13 @@ # See the LICENSE file for more information. # import traceback - -from ten_ai_base.helper import PCMWriter from .elevenlabs_tts import ElevenLabsTTS, ElevenLabsTTSConfig from ten import ( AsyncTenEnv, ) from ten_ai_base.tts import AsyncTTSBaseExtension + class ElevenLabsTTSExtension(AsyncTTSBaseExtension): def __init__(self, name: str) -> None: super().__init__(name) @@ -26,13 +25,13 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: try: await super().on_start(ten_env) ten_env.log_debug("on_start") - self.config = ElevenLabsTTSConfig.create(ten_env=ten_env) + self.config = await ElevenLabsTTSConfig.create_async(ten_env=ten_env) if not self.config.api_key: raise ValueError("api_key is required") self.client = ElevenLabsTTS(self.config) - except Exception as err: + except Exception: ten_env.log_error(f"on_start failed: {traceback.format_exc()}") async def on_stop(self, ten_env: AsyncTenEnv) -> None: @@ -43,12 +42,14 @@ async def on_deinit(self, ten_env: AsyncTenEnv) -> None: await super().on_deinit(ten_env) ten_env.log_debug("on_deinit") - async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + async def on_request_tts( + self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool + ) -> None: audio_stream = await self.client.text_to_speech_stream(input_text) ten_env.log_info(f"on_request_tts: {input_text}") async for audio_data in audio_stream: - self.send_audio_out(ten_env, audio_data) + await self.send_audio_out(ten_env, audio_data) ten_env.log_info(f"on_request_tts: {input_text} done") async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: - return await super().on_cancel_tts(ten_env) \ No newline at end of file + return await super().on_cancel_tts(ten_env) diff --git a/agents/ten_packages/extension/elevenlabs_tts_python/manifest.json b/agents/ten_packages/extension/elevenlabs_tts_python/manifest.json index 2a24022c..b551367c 100644 --- a/agents/ten_packages/extension/elevenlabs_tts_python/manifest.json +++ b/agents/ten_packages/extension/elevenlabs_tts_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "elevenlabs_tts_python", - "version": "0.4.2", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4.2" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/elevenlabs_tts_python/requirements.txt b/agents/ten_packages/extension/elevenlabs_tts_python/requirements.txt index 7680389e..baecca8f 100644 --- a/agents/ten_packages/extension/elevenlabs_tts_python/requirements.txt +++ b/agents/ten_packages/extension/elevenlabs_tts_python/requirements.txt @@ -1 +1 @@ -elevenlabs>=1.13.0 \ No newline at end of file +elevenlabs>=1.50.0 \ No newline at end of file diff --git a/agents/ten_packages/extension/fashionai/BUILD.gn b/agents/ten_packages/extension/fashionai/BUILD.gn index 6a6765bb..4dd55d40 100644 --- a/agents/ten_packages/extension/fashionai/BUILD.gn +++ b/agents/ten_packages/extension/fashionai/BUILD.gn @@ -17,6 +17,5 @@ ten_package("fashionai") { "src/__init__.py", "src/addon.py", "src/extension.py", - "src/log.py", ] } diff --git a/agents/ten_packages/extension/fashionai/__init__.py b/agents/ten_packages/extension/fashionai/__init__.py index f06f1641..645dc801 100644 --- a/agents/ten_packages/extension/fashionai/__init__.py +++ b/agents/ten_packages/extension/fashionai/__init__.py @@ -6,6 +6,3 @@ # # from .src import addon -from .src.log import logger - -logger.info("fashionai extension loaded") diff --git a/agents/ten_packages/extension/fashionai/manifest.json b/agents/ten_packages/extension/fashionai/manifest.json index eddada40..33f6ff95 100644 --- a/agents/ten_packages/extension/fashionai/manifest.json +++ b/agents/ten_packages/extension/fashionai/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/fashionai/src/addon.py b/agents/ten_packages/extension/fashionai/src/addon.py index d638453a..2200059e 100644 --- a/agents/ten_packages/extension/fashionai/src/addon.py +++ b/agents/ten_packages/extension/fashionai/src/addon.py @@ -17,6 +17,5 @@ class FashionAIExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: from .extension import FashionAIExtension - from .log import logger - logger.info("FashionAIExtensionAddon on_create_instance") + ten_env.log_info("FashionAIExtensionAddon on_create_instance") ten_env.on_create_instance_done(FashionAIExtension(name), context) diff --git a/agents/ten_packages/extension/fashionai/src/extension.py b/agents/ten_packages/extension/fashionai/src/extension.py index 87bb39b6..90b05804 100644 --- a/agents/ten_packages/extension/fashionai/src/extension.py +++ b/agents/ten_packages/extension/fashionai/src/extension.py @@ -9,7 +9,6 @@ from ten import ( AudioFrame, VideoFrame, - Extension, AsyncTenEnv, Cmd, StatusCode, @@ -18,116 +17,128 @@ ) from ten.async_extension import AsyncExtension -from .log import logger import asyncio from .fashionai_client import FashionAIClient -import threading from datetime import datetime + class FashionAIExtension(AsyncExtension): - app_id = "" - token = "" - channel = "" - stream_id = 0 - service_id = "agora" + + def __init__(self, name: str): + super().__init__(name) + + self.app_id = "" + self.token = "" + self.channel = "" + self.stream_id = 0 + self.service_id = "agora" + self.stopped = False + self.queue = None + self.client = None + self.outdate_ts = datetime.now() async def on_init(self, ten_env: AsyncTenEnv) -> None: - logger.info("FASHION_AI on_init *********************************************************") + ten_env.log_info( + "FASHION_AI on_init *********************************************************" + ) self.stopped = False self.queue = asyncio.Queue(maxsize=3000) async def on_start(self, ten_env: AsyncTenEnv) -> None: - logger.info("FASHION_AI on_start *********************************************************") + ten_env.log_info( + "FASHION_AI on_start *********************************************************" + ) - # TODO: read properties, initialize resources try: - self.app_id = ten_env.get_property_string("app_id") - self.token = ten_env.get_property_string("token") - self.channel = ten_env.get_property_string("channel") - self.stream_id = str(ten_env.get_property_int("stream_id")) - self.service_id = ten_env.get_property_string("service_id") - - logger.info(f"FASHION_AI on_start: app_id = {self.app_id}, token = {self.token}, channel = {self.channel}, stream_id = {self.stream_id}, service_id = {self.service_id}") + self.app_id = await ten_env.get_property_string("app_id") + self.token = await ten_env.get_property_string("token") + self.channel = await ten_env.get_property_string("channel") + self.stream_id = str(await ten_env.get_property_int("stream_id")) + self.service_id = await ten_env.get_property_string("service_id") + + ten_env.log_info( + f"FASHION_AI on_start: app_id = {self.app_id}, token = {self.token}, channel = {self.channel}, stream_id = {self.stream_id}, service_id = {self.service_id}" + ) except Exception as e: - logger.warning(f"get_property err: {e}") + ten_env.log_warn(f"get_property err: {e}") if len(self.token) > 0: self.app_id = self.token - self.client = FashionAIClient("wss://ingress.service.fasionai.com/websocket/node5/agoramultimodel2", self.service_id) - asyncio.create_task(self.process_input_text()) + self.client = FashionAIClient( + ten_env, + "wss://ingress.service.fasionai.com/websocket/node5/agoramultimodel2", + self.service_id, + ) + asyncio.create_task(self.process_input_text(ten_env)) await self.init_fashionai(self.app_id, self.channel, self.stream_id) async def on_stop(self, ten_env: AsyncTenEnv) -> None: - logger.info("FASHION_AI on_stop") + ten_env.log_info("FASHION_AI on_stop") self.stopped = True await self.queue.put(None) async def on_deinit(self, ten_env: AsyncTenEnv) -> None: - logger.info("FASHION_AI on_deinit") + ten_env.log_info("FASHION_AI on_deinit") async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() - logger.info("FASHION_AI on_cmd name {}".format(cmd_name)) + ten_env.log_info(f"FASHION_AI on_cmd name {cmd_name}") - # TODO: process cmd if cmd_name == "flush": self.outdate_ts = datetime.now() try: await self.client.send_interrupt() - except Exception as e: - logger.warning(f"flush err: {traceback.format_exc()}") + except Exception: + ten_env.log_warn(f"flush err: {traceback.format_exc()}") cmd_out = Cmd.create("flush") await ten_env.send_cmd(cmd_out) - # ten_env.send_cmd(cmd_out, lambda ten, result: logger.info("send_cmd flush done")) + # ten_env.send_cmd(cmd_out, lambda ten, result: ten_env.log_info("send_cmd flush done")) else: - logger.info("unknown cmd {}".format(cmd_name)) + ten_env.log_info(f"unknown cmd {cmd_name}") - logger.info("FASHION_AI on_cmd done") + ten_env.log_info("FASHION_AI on_cmd done") cmd_result = CmdResult.create(StatusCode.OK) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: - # TODO: process data - inputText = data.get_property_string("text") - if len(inputText) == 0: - logger.info("FASHION_AI ignore empty text") + input_text = data.get_property_string("text") + if len(input_text) == 0: + ten_env.log_info("FASHION_AI ignore empty text") return - logger.info("FASHION_AI on data %s", inputText) + ten_env.log_info("FASHION_AI on data %s", input_text) try: - await self.queue.put(inputText) + await self.queue.put(input_text) except asyncio.TimeoutError: - logger.warning(f"FASHION_AI put inputText={inputText} queue timed out") + ten_env.log_warn(f"FASHION_AI put inputText={input_text} queue timed out") except Exception as e: - logger.warning(f"FASHION_AI put inputText={inputText} queue err: {e}") - logger.info("FASHION_AI send_inputText %s", inputText) + ten_env.log_warn(f"FASHION_AI put inputText={input_text} queue err: {e}") + ten_env.log_info("FASHION_AI send_inputText %s", input_text) - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: - # TODO: process pcm frame + async def on_audio_frame(self, _: AsyncTenEnv, audio_frame: AudioFrame) -> None: pass - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: - # TODO: process image frame - pass + async def on_video_frame(self, _: AsyncTenEnv, video_frame: VideoFrame) -> None: + pass async def init_fashionai(self, app_id, channel, stream_id): await self.client.connect() await self.client.stream_start(app_id, channel, stream_id) await self.client.render_start() - - async def process_input_text(self): + + async def process_input_text(self, ten_env: AsyncTenEnv): while True: - inputText = await self.queue.get() - if inputText is None: - logger.info("Stopping async_polly_handler...") + input_text = await self.queue.get() + if input_text is None: + ten_env.log_info("Stopping async_polly_handler...") break - logger.info(f"async_polly_handler: loop fashion ai polly.{inputText}") + ten_env.log_info(f"async_polly_handler: loop fashion ai polly.{input_text}") - if len(inputText) > 0: + if len(input_text) > 0: try: - await self.client.send_inputText(inputText) + await self.client.send_inputText(input_text) except Exception as e: - logger.exception(e) + ten_env.log_error(str(e)) diff --git a/agents/ten_packages/extension/fashionai/src/fashionai_client.py b/agents/ten_packages/extension/fashionai/src/fashionai_client.py index 37206f08..0b2e36e3 100644 --- a/agents/ten_packages/extension/fashionai/src/fashionai_client.py +++ b/agents/ten_packages/extension/fashionai/src/fashionai_client.py @@ -5,52 +5,54 @@ import websockets import asyncio -from .log import logger - class FashionAIClient: - def __init__(self, uri, service_id): + def __init__(self, ten_env, uri, service_id): self.uri = uri self.websocket = None self.service_id = service_id self.cancelled = False + self.ten_env = ten_env async def connect(self): + # pylint: disable=protected-access ssl_context = ssl._create_unverified_context() self.websocket = await websockets.connect(self.uri, ssl=ssl_context) - asyncio.create_task(self.listen()) # Start listening immediately after connection + asyncio.create_task( + self.listen() + ) # Start listening immediately after connection async def listen(self): """Continuously listen for incoming messages.""" if self.websocket is not None: try: async for message in self.websocket: - logger.info(f"FASHION_AI Received: {message}") + self.ten_env.log_info(f"FASHION_AI Received: {message}") # await self.handle_message(message) except websockets.exceptions.ConnectionClosedError as e: - logger.info(f"FASHION_AI Connection closed with error: {e}") + self.ten_env.log_info(f"FASHION_AI Connection closed with error: {e}") await self.reconnect() async def stream_start(self, app_id, channel, stream_id): await self.send_message( - { - "request_id": str(uuid.uuid4()), - "service_id": self.service_id, - "token": app_id, - "channel_id": channel, - "user_id": stream_id, - "signal": "STREAM_START", - } - ) - + { + "request_id": str(uuid.uuid4()), + "service_id": self.service_id, + "token": app_id, + "channel_id": channel, + "user_id": stream_id, + "signal": "STREAM_START", + } + ) + async def stream_stop(self): await self.send_message( - { - "request_id": str(uuid.uuid4()), - "service_id": self.service_id, - "signal": "STREAM_STOP", - } - ) + { + "request_id": str(uuid.uuid4()), + "service_id": self.service_id, + "signal": "STREAM_STOP", + } + ) async def render_start(self): await self.send_message( @@ -66,7 +68,7 @@ async def send_inputText(self, inputText): if self.cancelled: await self.render_start() await self.send_message( - { + { "request_id": str(uuid.uuid4()), "service_id": self.service_id, "signal": "RENDER_CONTENT", @@ -76,38 +78,37 @@ async def send_inputText(self, inputText): async def send_interrupt(self): await self.send_message( - { + { "service_id": self.service_id, "signal": "RENDER_CANCEL", } ) self.cancelled = True - async def send_message(self, message): if self.websocket is not None: try: await self.websocket.send(json.dumps(message)) - logger.info(f"FASHION_AI Sent: {message}") + self.ten_env.log_info(f"FASHION_AI Sent: {message}") # response = await asyncio.wait_for(self.websocket.recv(), timeout=2) - # logger.info(f"FASHION_AI Received: {response}") + # self.ten_env.log_info(f"FASHION_AI Received: {response}") except websockets.exceptions.ConnectionClosedError as e: - logger.info(f"FASHION_AI Connection closed with error: {e}") + self.ten_env.log_info(f"FASHION_AI Connection closed with error: {e}") await self.reconnect() except asyncio.TimeoutError: - logger.info("FASHION_AI Timeout waiting for response") + self.ten_env.log_info("FASHION_AI Timeout waiting for response") else: - logger.info("FASHION_AI WebSocket is not connected.") + self.ten_env.log_info("FASHION_AI WebSocket is not connected.") async def close(self): if self.websocket is not None: await self.websocket.close() - logger.info("FASHION_AI WebSocket connection closed.") + self.ten_env.log_info("FASHION_AI WebSocket connection closed.") else: - logger.info("FASHION_AI WebSocket is not connected.") + self.ten_env.log_info("FASHION_AI WebSocket is not connected.") async def reconnect(self): - logger.info("FASHION_AI Reconnecting...") + self.ten_env.log_info("FASHION_AI Reconnecting...") await self.close() await self.connect() @@ -117,4 +118,4 @@ async def heartbeat(self, interval): try: await self.send_inputText("ping") except websockets.exceptions.ConnectionClosedError: - break \ No newline at end of file + break diff --git a/agents/ten_packages/extension/fashionai/src/log.py b/agents/ten_packages/extension/fashionai/src/log.py deleted file mode 100644 index 84b4ca42..00000000 --- a/agents/ten_packages/extension/fashionai/src/log.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by Wei Hu in 2024-08. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# -import logging - -logger = logging.getLogger("fashionai") -logger.setLevel(logging.INFO) - -formatter_str = ( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " - "[%(filename)s:%(lineno)d] - %(message)s" -) -formatter = logging.Formatter(formatter_str) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/file_chunker/__init__.py b/agents/ten_packages/extension/file_chunker/__init__.py index 06d98178..ee1b1d39 100644 --- a/agents/ten_packages/extension/file_chunker/__init__.py +++ b/agents/ten_packages/extension/file_chunker/__init__.py @@ -1,4 +1 @@ from . import file_chunker_addon -from .log import logger - -logger.info("file_chunker extension loaded") diff --git a/agents/ten_packages/extension/file_chunker/file_chunker_addon.py b/agents/ten_packages/extension/file_chunker/file_chunker_addon.py index ac6be319..7828f1a3 100644 --- a/agents/ten_packages/extension/file_chunker/file_chunker_addon.py +++ b/agents/ten_packages/extension/file_chunker/file_chunker_addon.py @@ -8,7 +8,6 @@ @register_addon_as_extension("file_chunker") class FileChunkerExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger from .file_chunker_extension import FileChunkerExtension - logger.info("on_create_instance") + ten.log_info("on_create_instance") ten.on_create_instance_done(FileChunkerExtension(addon_name), context) diff --git a/agents/ten_packages/extension/file_chunker/file_chunker_extension.py b/agents/ten_packages/extension/file_chunker/file_chunker_extension.py index 8676a5b2..4ff0ebf7 100644 --- a/agents/ten_packages/extension/file_chunker/file_chunker_extension.py +++ b/agents/ten_packages/extension/file_chunker/file_chunker_extension.py @@ -13,7 +13,6 @@ CmdResult, ) from typing import List, Any -from .log import logger import json from datetime import datetime import uuid, math @@ -23,7 +22,6 @@ UPSERT_VECTOR_CMD = "upsert_vector" FILE_CHUNKED_CMD = "file_chunked" -# TODO: configable CHUNK_SIZE = 200 CHUNK_OVERLAP = 20 BATCH_SIZE = 5 @@ -60,7 +58,7 @@ def generate_collection_name(self) -> str: return "coll_" + uuid.uuid1().hex.lower() - def split(self, path: str) -> List[Any]: + def split(self, ten: TenEnv, path: str) -> List[Any]: # lazy import packages which requires long time to load from llama_index.core import SimpleDirectoryReader from llama_index.core.node_parser import SentenceSplitter @@ -76,11 +74,7 @@ def split(self, path: str) -> List[Any]: chunk_overlap=CHUNK_OVERLAP, ) nodes = splitter.get_nodes_from_documents(documents) - logger.info( - "file {} pages count {}, chunking count {}".format( - path, len(documents), len(nodes) - ) - ) + ten.log_info(f"file {path} pages count {documents}, chunking count {nodes}") return nodes def create_collection(self, ten: TenEnv, collection_name: str, wait: bool): @@ -96,23 +90,18 @@ def create_collection(self, ten: TenEnv, collection_name: str, wait: bool): wait_event.wait() def embedding(self, ten: TenEnv, path: str, texts: List[str]): - logger.info( - "generate embeddings for the file: {}, with batch size: {}".format( - path, len(texts) - ) + ten.log_info( + f"generate embeddings for the file: {path}, with batch size: {len(texts)}" ) cmd_out = Cmd.create("embed_batch") cmd_out.set_property_from_json("inputs", json.dumps(texts)) ten.send_cmd( - cmd_out, - lambda ten, result: self.vector_store( - ten, path, texts, result - ), # TODO: deal with error + cmd_out, lambda ten, result: self.vector_store(ten, path, texts, result) ) def vector_store(self, ten: TenEnv, path: str, texts: List[str], result: CmdResult): - logger.info("vector store start for one splitting of the file {}".format(path)) + ten.log_info(f"vector store start for one splitting of the file {path}") file_name = path.split("/")[-1] embed_output_json = result.get_property_string("embeddings") embed_output = json.loads(embed_output_json) @@ -124,13 +113,13 @@ def vector_store(self, ten: TenEnv, path: str, texts: List[str], result: CmdResu for text, embedding in zip(texts, embeddings): content.append({"text": text, "embedding": embedding}) cmd_out.set_property_string("content", json.dumps(content)) - # logger.info(json.dumps(content)) + # ten.log_info(json.dumps(content)) ten.send_cmd(cmd_out, lambda ten, result: self.file_chunked(ten, path)) def file_chunked(self, ten: TenEnv, path: str): if path in self.counters and path in self.expected: self.counters[path] += 1 - logger.info( + ten.log_info( "complete vector store for one splitting of the file: %s, current counter: %i, expected: %i", path, self.counters[path], @@ -140,22 +129,19 @@ def file_chunked(self, ten: TenEnv, path: str): chunks_count = self.counters[path] del self.counters[path] del self.expected[path] - logger.info( - "complete chunk for the file: {}, chunks_count {}".format( - path, - chunks_count, - ) + ten.log_info( + f"complete chunk for the file: {path}, chunks_count {chunks_count}" ) cmd_out = Cmd.create(FILE_CHUNKED_CMD) cmd_out.set_property_string("path", path) cmd_out.set_property_string("collection", self.new_collection_name) ten.send_cmd( cmd_out, - lambda ten, result: logger.info("send_cmd done"), + lambda ten, result: ten.log_info("send_cmd done"), ) self.file_chunked_event.set() else: - logger.error("missing counter for the file path: %s", path) + ten.log_error("missing counter for the file path: %s", path) def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() @@ -165,12 +151,12 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: collection = None try: collection = cmd.get_property_string("collection") - except Exception as e: - logger.warning("missing collection property in cmd {}".format(cmd_name)) + except Exception: + ten.log_warn(f"missing collection property in cmd {cmd_name}") self.queue.put((path, collection)) # make sure files are processed in order else: - logger.info("unknown cmd {}".format(cmd_name)) + ten.log_info(f"unknown cmd {cmd_name}") cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "ok") @@ -187,15 +173,15 @@ def async_handler(self, ten: TenEnv) -> None: start_time = datetime.now() if collection is None: collection = self.generate_collection_name() - logger.info("collection {} generated".format(collection)) - logger.info("start processing {}, collection {}".format(path, collection)) + ten.log_info(f"collection {collection} generated") + ten.log_info(f"start processing {path}, collection {collection}") # create collection self.create_collection(ten, collection, True) - logger.info("collection {} created".format(collection)) + ten.log_info(f"collection {collection} created") # split - nodes = self.split(path) + nodes = self.split(ten, path) # reset counters and events self.new_collection_name = collection @@ -210,16 +196,12 @@ def async_handler(self, ten: TenEnv) -> None: # wait for all chunks to be processed self.file_chunked_event.wait() - logger.info( - "finished processing {}, collection {}, cost {}ms".format( - path, - collection, - int((datetime.now() - start_time).total_seconds() * 1000), - ) + ten.log_info( + f"finished processing {path}, collection {collection}, cost {int((datetime.now() - start_time).total_seconds() * 1000)}ms" ) def on_start(self, ten: TenEnv) -> None: - logger.info("on_start") + ten.log_info("on_start") self.stop = False self.thread = threading.Thread(target=self.async_handler, args=[ten]) @@ -228,7 +210,7 @@ def on_start(self, ten: TenEnv) -> None: ten.on_start_done() def on_stop(self, ten: TenEnv) -> None: - logger.info("on_stop") + ten.log_info("on_stop") self.stop = True if self.thread is not None: diff --git a/agents/ten_packages/extension/file_chunker/log.py b/agents/ten_packages/extension/file_chunker/log.py deleted file mode 100644 index d9cb27d3..00000000 --- a/agents/ten_packages/extension/file_chunker/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("file_chunker") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/file_chunker/manifest.json b/agents/ten_packages/extension/file_chunker/manifest.json index 460c4382..00370056 100644 --- a/agents/ten_packages/extension/file_chunker/manifest.json +++ b/agents/ten_packages/extension/file_chunker/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "file_chunker", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go index 6ef40e08..f3449df1 100644 --- a/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go +++ b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts.go @@ -12,8 +12,8 @@ import ( "bytes" "fmt" "io" - "log/slog" "net/http" + "ten_framework/ten" "time" "github.com/vmihailenco/msgpack/v5" @@ -56,7 +56,7 @@ func newFishAudioTTS(config fishAudioTTSConfig) (*fishAudioTTS, error) { }, nil } -func (e *fishAudioTTS) textToSpeechStream(streamWriter io.Writer, text string) (err error) { +func (e *fishAudioTTS) textToSpeechStream(tenEnv ten.TenEnv, streamWriter io.Writer, text string) (err error) { latency := "normal" if e.config.OptimizeStreamingLatency { latency = "balanced" @@ -101,7 +101,7 @@ func (e *fishAudioTTS) textToSpeechStream(streamWriter io.Writer, text string) ( // Check the response status code if resp.StatusCode != http.StatusOK { - slog.Error("Unexpected response status", "status", resp.StatusCode) + tenEnv.LogError(fmt.Sprintf("Unexpected response status, status: %d", resp.StatusCode)) return fmt.Errorf("unexpected response status: %d", resp.StatusCode) } @@ -110,7 +110,7 @@ func (e *fishAudioTTS) textToSpeechStream(streamWriter io.Writer, text string) ( for { n, err := resp.Body.Read(buffer) if err != nil && err != io.EOF { - slog.Error("Failed to read from response body", "error", err) + tenEnv.LogError(fmt.Sprintf("Failed to read from response body, error: %s", err)) return fmt.Errorf("failed to read from response body: %w", err) } if n == 0 { @@ -119,7 +119,7 @@ func (e *fishAudioTTS) textToSpeechStream(streamWriter io.Writer, text string) ( _, writeErr := streamWriter.Write(buffer[:n]) if writeErr != nil { - slog.Error("Failed to write to streamWriter", "error", writeErr) + tenEnv.LogError(fmt.Sprintf("Failed to write to streamWriter, error: %s", writeErr)) return fmt.Errorf("failed to write to streamWriter: %w", writeErr) } } diff --git a/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go index 3b017e17..7d2d5f4d 100644 --- a/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go +++ b/agents/ten_packages/extension/fish_audio_tts/fish_audio_tts_extension.go @@ -11,7 +11,6 @@ package extension import ( "fmt" "io" - "log/slog" "sync" "sync/atomic" "time" @@ -36,8 +35,6 @@ const ( ) var ( - logTag = slog.String("extension", "FISH_AUDIO_TTS_EXTENSION") - outdateTs atomic.Int64 textChan chan *message wg sync.WaitGroup @@ -66,20 +63,20 @@ func newFishAudioTTSExtension(name string) ten.Extension { // - request_timeout_seconds // - base_url func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { - slog.Info("OnStart", logTag) + ten.LogInfo("OnStart") // prepare configuration fishAudioTTSConfig := defaultFishAudioTTSConfig() if apiKey, err := ten.GetPropertyString(propertyApiKey); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + ten.LogError(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err)) return } else { fishAudioTTSConfig.ApiKey = apiKey } if modelId, err := ten.GetPropertyString(propertyModelId); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err)) } else { if len(modelId) > 0 { fishAudioTTSConfig.ModelId = modelId @@ -87,13 +84,13 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { } if optimizeStreamingLatency, err := ten.GetPropertyBool(propertyOptimizeStreamingLatency); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err)) } else { fishAudioTTSConfig.OptimizeStreamingLatency = optimizeStreamingLatency } if requestTimeoutSeconds, err := ten.GetPropertyInt64(propertyRequestTimeoutSeconds); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err)) } else { if requestTimeoutSeconds > 0 { fishAudioTTSConfig.RequestTimeoutSeconds = int(requestTimeoutSeconds) @@ -101,7 +98,7 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { } if baseUrl, err := ten.GetPropertyString(propertyBaseUrl); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyBaseUrl, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyBaseUrl, err)) } else { if len(baseUrl) > 0 { fishAudioTTSConfig.BaseUrl = baseUrl @@ -111,12 +108,12 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { // create fishAudioTTS instance fishAudioTTS, err := newFishAudioTTS(fishAudioTTSConfig) if err != nil { - slog.Error(fmt.Sprintf("newFishAudioTTS failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("newFishAudioTTS failed, err: %v", err)) return } - slog.Info(fmt.Sprintf("newFishAudioTTS succeed with ModelId: %s", - fishAudioTTSConfig.ModelId), logTag) + ten.LogInfo(fmt.Sprintf("newFishAudioTTS succeed with ModelId: %s", + fishAudioTTSConfig.ModelId)) // set fishAudio instance e.fishAudioTTS = fishAudioTTS @@ -129,17 +126,17 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { textChan = make(chan *message, textChanMax) go func() { - slog.Info("process textChan", logTag) + ten.LogInfo("process textChan") for msg := range textChan { if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) + ten.LogInfo(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load())) continue } wg.Add(1) - slog.Info(fmt.Sprintf("textChan text: [%s]", msg.text), logTag) + ten.LogInfo(fmt.Sprintf("textChan text: [%s]", msg.text)) r, w := io.Pipe() startTime := time.Now() @@ -148,16 +145,16 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { defer wg.Done() defer w.Close() - slog.Info(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text), logTag) - err = e.fishAudioTTS.textToSpeechStream(w, msg.text) - slog.Info(fmt.Sprintf("textToSpeechStream result: [%v]", err), logTag) + ten.LogInfo(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text)) + err = e.fishAudioTTS.textToSpeechStream(ten, w, msg.text) + ten.LogInfo(fmt.Sprintf("textToSpeechStream result: [%v]", err)) if err != nil { - slog.Error(fmt.Sprintf("textToSpeechStream failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("textToSpeechStream failed, err: %v", err)) return } }() - slog.Info(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize), logTag) + ten.LogInfo(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize)) var ( firstFrameLatency int64 @@ -171,8 +168,8 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { // read pcm stream for { if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) + ten.LogInfo(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load())) break } @@ -182,16 +179,16 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { if err != nil { if err == io.EOF { - slog.Info("read pcm stream EOF", logTag) + ten.LogInfo("read pcm stream EOF") break } - slog.Error(fmt.Sprintf("read pcm stream failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("read pcm stream failed, err: %v", err)) break } if pcmFrameRead != pcmFrameSize { - slog.Debug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead), logTag) + ten.LogDebug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead)) continue } @@ -203,21 +200,21 @@ func (e *fishAudioTTSExtension) OnStart(ten ten.TenEnv) { if firstFrameLatency == 0 { firstFrameLatency = time.Since(startTime).Milliseconds() - slog.Info(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency), logTag) + ten.LogInfo(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency)) } - slog.Debug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text), logTag) + ten.LogDebug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text)) } if pcmFrameRead > 0 { pcm.send(ten, buf) sentFrames++ - slog.Info(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead), logTag) + ten.LogInfo(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead)) } r.Close() - slog.Info(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", - msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds()), logTag) + ten.LogInfo(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", + msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds())) } }() @@ -235,13 +232,13 @@ func (e *fishAudioTTSExtension) OnCmd( ) { cmdName, err := cmd.GetName() if err != nil { - slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("OnCmd get name failed, err: %v", err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } - slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnCmd %s", cmdInFlush)) switch cmdName { case cmdInFlush: @@ -250,24 +247,24 @@ func (e *fishAudioTTSExtension) OnCmd( // send out outCmd, err := ten.NewCmd(cmdOutFlush) if err != nil { - slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } if err := tenEnv.SendCmd(outCmd, nil); err != nil { - slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } else { - slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("cmd %s sent", cmdOutFlush)) } } cmdResult, _ := ten.NewCmdResult(ten.StatusCodeOk) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) } // OnData receives data from ten graph. @@ -281,16 +278,16 @@ func (e *fishAudioTTSExtension) OnData( ) { text, err := data.GetPropertyString(dataInTextDataPropertyText) if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err)) return } if len(text) == 0 { - slog.Debug("OnData text is empty, ignored", logTag) + tenEnv.LogDebug("OnData text is empty, ignored") return } - slog.Info(fmt.Sprintf("OnData input text: [%s]", text), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnData input text: [%s]", text)) go func() { textChan <- &message{text: text, receivedTs: time.Now().UnixMicro()} @@ -298,8 +295,6 @@ func (e *fishAudioTTSExtension) OnData( } func init() { - slog.Info("fish_audio_tts extension init", logTag) - // Register addon ten.RegisterAddonAsExtension( "fish_audio_tts", diff --git a/agents/ten_packages/extension/fish_audio_tts/manifest.json b/agents/ten_packages/extension/fish_audio_tts/manifest.json index b5befbf4..e0b76927 100644 --- a/agents/ten_packages/extension/fish_audio_tts/manifest.json +++ b/agents/ten_packages/extension/fish_audio_tts/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "fish_audio_tts", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/fish_audio_tts/pcm.go b/agents/ten_packages/extension/fish_audio_tts/pcm.go index f78efdde..3d1d788c 100644 --- a/agents/ten_packages/extension/fish_audio_tts/pcm.go +++ b/agents/ten_packages/extension/fish_audio_tts/pcm.go @@ -10,7 +10,6 @@ package extension import ( "fmt" - "log/slog" "ten_framework/ten" ) @@ -47,10 +46,10 @@ func newPcm(config *pcmConfig) *pcm { } } -func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { +func (p *pcm) getPcmFrame(tenEnv ten.TenEnv, buf []byte) (pcmFrame ten.AudioFrame, err error) { pcmFrame, err = ten.NewAudioFrame(p.config.Name) if err != nil { - slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("NewPcmFrame failed, err: %v", err)) return } @@ -66,7 +65,7 @@ func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { borrowedBuf, err := pcmFrame.LockBuf() if err != nil { - slog.Error(fmt.Sprintf("LockBuf failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("LockBuf failed, err: %v", err)) return } @@ -86,15 +85,15 @@ func (p *pcm) newBuf() []byte { } func (p *pcm) send(tenEnv ten.TenEnv, buf []byte) (err error) { - pcmFrame, err := p.getPcmFrame(buf) + pcmFrame, err := p.getPcmFrame(tenEnv, buf) if err != nil { - slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("getPcmFrame failed, err: %v", err)) return } // send pcm - if err = tenEnv.SendAudioFrame(pcmFrame); err != nil { - slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) + if err = tenEnv.SendAudioFrame(pcmFrame, nil); err != nil { + tenEnv.LogError(fmt.Sprintf("SendPcmFrame failed, err: %v", err)) return } diff --git a/agents/ten_packages/extension/gemini_llm_python/__init__.py b/agents/ten_packages/extension/gemini_llm_python/__init__.py index d56c02f0..276699b3 100644 --- a/agents/ten_packages/extension/gemini_llm_python/__init__.py +++ b/agents/ten_packages/extension/gemini_llm_python/__init__.py @@ -1,6 +1 @@ from . import gemini_llm_addon -from .extension import EXTENSION_NAME -from .log import logger - - -logger.info(f"{EXTENSION_NAME} extension loaded") diff --git a/agents/ten_packages/extension/gemini_llm_python/extension.py b/agents/ten_packages/extension/gemini_llm_python/extension.py deleted file mode 100644 index ba035e5d..00000000 --- a/agents/ten_packages/extension/gemini_llm_python/extension.py +++ /dev/null @@ -1 +0,0 @@ -EXTENSION_NAME = "gemini_llm_python" diff --git a/agents/ten_packages/extension/gemini_llm_python/gemini_llm.py b/agents/ten_packages/extension/gemini_llm_python/gemini_llm.py index f11317b5..92031d70 100644 --- a/agents/ten_packages/extension/gemini_llm_python/gemini_llm.py +++ b/agents/ten_packages/extension/gemini_llm_python/gemini_llm.py @@ -3,14 +3,16 @@ class GeminiLLMConfig: - def __init__(self, - api_key: str, - max_output_tokens: int, - model: str, - prompt: str, - temperature: float, - top_k: int, - top_p: float): + def __init__( + self, + api_key: str, + max_output_tokens: int, + model: str, + prompt: str, + temperature: float, + top_k: int, + top_p: float, + ): self.api_key = api_key self.max_output_tokens = max_output_tokens self.model = model @@ -36,19 +38,24 @@ class GeminiLLM: def __init__(self, config: GeminiLLMConfig): self.config = config genai.configure(api_key=self.config.api_key) - self.model = genai.GenerativeModel(model_name=self.config.model, system_instruction=self.config.prompt) + self.model = genai.GenerativeModel( + model_name=self.config.model, system_instruction=self.config.prompt + ) def get_chat_completions_stream(self, messages: List[Dict[str, str]]): try: chat = self.model.start_chat(history=messages[0:-1]) - response = chat.send_message(messages[-1].get("parts"), - generation_config=genai.types.GenerationConfig( - max_output_tokens=self.config.max_output_tokens, - temperature=self.config.temperature, - top_k=self.config.top_k, - top_p=self.config.top_p), - stream=True) + response = chat.send_message( + messages[-1].get("parts"), + generation_config=genai.types.GenerationConfig( + max_output_tokens=self.config.max_output_tokens, + temperature=self.config.temperature, + top_k=self.config.top_k, + top_p=self.config.top_p, + ), + stream=True, + ) return response except Exception as e: - raise Exception(f"get_chat_completions_stream failed, err: {e}") + raise RuntimeError(f"get_chat_completions_stream failed, err: {e}") from e diff --git a/agents/ten_packages/extension/gemini_llm_python/gemini_llm_addon.py b/agents/ten_packages/extension/gemini_llm_python/gemini_llm_addon.py index 3e07752a..ea27df91 100644 --- a/agents/ten_packages/extension/gemini_llm_python/gemini_llm_addon.py +++ b/agents/ten_packages/extension/gemini_llm_python/gemini_llm_addon.py @@ -10,14 +10,10 @@ register_addon_as_extension, TenEnv, ) -from .extension import EXTENSION_NAME - -@register_addon_as_extension(EXTENSION_NAME) +@register_addon_as_extension("gemini_llm_python") class GeminiLLMExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger from .gemini_llm_extension import GeminiLLMExtension - logger.info("on_create_instance") - + ten.log_info("on_create_instance") ten.on_create_instance_done(GeminiLLMExtension(addon_name), context) diff --git a/agents/ten_packages/extension/gemini_llm_python/gemini_llm_extension.py b/agents/ten_packages/extension/gemini_llm_python/gemini_llm_extension.py index 9cbf248e..99b0851a 100644 --- a/agents/ten_packages/extension/gemini_llm_python/gemini_llm_extension.py +++ b/agents/ten_packages/extension/gemini_llm_python/gemini_llm_extension.py @@ -14,7 +14,6 @@ StatusCode, CmdResult, ) -from .log import logger from .utils import get_micro_ts, parse_sentence @@ -43,7 +42,7 @@ class GeminiLLMExtension(Extension): gemini_llm = None def on_start(self, ten: TenEnv) -> None: - logger.info("GeminiLLMExtension on_start") + ten.log_info("GeminiLLMExtension on_start") # lazy import packages which requires long time to load from .gemini_llm import GeminiLLM, GeminiLLMConfig @@ -55,7 +54,7 @@ def on_start(self, ten: TenEnv) -> None: api_key = ten.get_property_string(PROPERTY_API_KEY) gemini_llm_config.api_key = api_key except Exception as err: - logger.info(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") + ten.log_info(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") return for key in [PROPERTY_GREETING, PROPERTY_MODEL, PROPERTY_PROMPT]: @@ -64,32 +63,32 @@ def on_start(self, ten: TenEnv) -> None: if val: setattr(gemini_llm_config, key, val) except Exception as e: - logger.warning(f"get_property_string optional {key} failed, err: {e}") + ten.log_warn(f"get_property_string optional {key} failed, err: {e}") for key in [PROPERTY_TEMPERATURE, PROPERTY_TOP_P]: try: setattr(gemini_llm_config, key, float(ten.get_property_float(key))) except Exception as e: - logger.warning(f"get_property_float optional {key} failed, err: {e}") + ten.log_warn(f"get_property_float optional {key} failed, err: {e}") for key in [PROPERTY_MAX_OUTPUT_TOKENS, PROPERTY_TOP_K]: try: setattr(gemini_llm_config, key, int(ten.get_property_int(key))) except Exception as e: - logger.warning(f"get_property_int optional {key} failed, err: {e}") + ten.log_warn(f"get_property_int optional {key} failed, err: {e}") try: prop_max_memory_length = ten.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) if prop_max_memory_length > 0: self.max_memory_length = int(prop_max_memory_length) except Exception as err: - logger.warning( + ten.log_warn( f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}" ) # Create GeminiLLM instance self.gemini_llm = GeminiLLM(gemini_llm_config) - logger.info( + ten.log_info( f"newGeminiLLM succeed with max_output_tokens: {gemini_llm_config.max_output_tokens}, model: {gemini_llm_config.model}" ) @@ -105,20 +104,20 @@ def on_start(self, ten: TenEnv) -> None: DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True ) ten.send_data(output_data) - logger.info(f"greeting [{greeting}] sent") + ten.log_info(f"greeting [{greeting}] sent") except Exception as e: - logger.error(f"greeting [{greeting}] send failed, err: {e}") + ten.log_error(f"greeting [{greeting}] send failed, err: {e}") ten.on_start_done() def on_stop(self, ten: TenEnv) -> None: - logger.info("GeminiLLMExtension on_stop") + ten.log_info("GeminiLLMExtension on_stop") ten.on_stop_done() def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: - logger.info("GeminiLLMExtension on_cmd") - cmd_json = cmd.to_json() - logger.info(f"GeminiLLMExtension on_cmd json: {cmd_json}") + ten.log_info("GeminiLLMExtension on_cmd") + cmd_name = cmd.get_name() + ten.log_info(f"GeminiLLMExtension on_cmd json: {cmd_name}") cmd_name = cmd.get_name() @@ -126,9 +125,9 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: self.outdate_ts = get_micro_ts() cmd_out = Cmd.create(CMD_OUT_FLUSH) ten.send_cmd(cmd_out, None) - logger.info(f"GeminiLLMExtension on_cmd sent flush") + ten.log_info("GeminiLLMExtension on_cmd sent flush") else: - logger.info(f"GeminiLLMExtension on_cmd unknown cmd: {cmd_name}") + ten.log_info(f"GeminiLLMExtension on_cmd unknown cmd: {cmd_name}") cmd_result = CmdResult.create(StatusCode.ERROR) cmd_result.set_property_string("detail", "unknown cmd") ten.return_result(cmd_result, cmd) @@ -146,16 +145,16 @@ def on_data(self, ten: TenEnv, data: Data) -> None: example: {name: text_data, properties: {text: "hello"} """ - logger.info(f"GeminiLLMExtension on_data") + ten.log_info("GeminiLLMExtension on_data") # Assume 'data' is an object from which we can get properties try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) if not is_final: - logger.info("ignore non-final input") + ten.log_info("ignore non-final input") return except Exception as e: - logger.error( + ten.log_error( f"on_data get_property_bool {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {e}" ) return @@ -164,11 +163,11 @@ def on_data(self, ten: TenEnv, data: Data) -> None: try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) if not input_text: - logger.info("ignore empty text") + ten.log_info("ignore empty text") return - logger.info(f"on_data input text: [{input_text}]") + ten.log_info(f"on_data input text: [{input_text}]") except Exception as e: - logger.error( + ten.log_error( f"on_data get_property_string {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {e}" ) return @@ -180,14 +179,14 @@ def on_data(self, ten: TenEnv, data: Data) -> None: def chat_completions_stream_worker(start_time, input_text, memory): try: - logger.info( + ten.log_info( f"chat_completions_stream_worker for input text: [{input_text}] memory: {memory}" ) # Get result from AI resp = self.gemini_llm.get_chat_completions_stream(memory) if resp is None: - logger.info( + ten.log_info( f"chat_completions_stream_worker for input text: [{input_text}] failed" ) return @@ -198,7 +197,7 @@ def chat_completions_stream_worker(start_time, input_text, memory): for chat_completions in resp: if start_time < self.outdate_ts: - logger.info( + ten.log_info( f"chat_completions_stream_worker recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}" ) break @@ -216,10 +215,10 @@ def chat_completions_stream_worker(start_time, input_text, memory): ) if len(sentence) == 0 or not sentence_is_final: - logger.info(f"sentence {sentence} is empty or not final") + ten.log_info(f"sentence {sentence} is empty or not final") break - logger.info( + ten.log_info( f"chat_completions_stream_worker recv for input text: [{input_text}] got sentence: [{sentence}]" ) @@ -233,11 +232,11 @@ def chat_completions_stream_worker(start_time, input_text, memory): DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False ) ten.send_data(output_data) - logger.info( + ten.log_info( f"chat_completions_stream_worker recv for input text: [{input_text}] sent sentence [{sentence}]" ) except Exception as e: - logger.error( + ten.log_error( f"chat_completions_stream_worker recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {e}" ) break @@ -245,7 +244,7 @@ def chat_completions_stream_worker(start_time, input_text, memory): sentence = "" if not first_sentence_sent: first_sentence_sent = True - logger.info( + ten.log_info( f"chat_completions_stream_worker recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_micro_ts() - start_time}ms" ) @@ -262,16 +261,16 @@ def chat_completions_stream_worker(start_time, input_text, memory): DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True ) ten.send_data(output_data) - logger.info( + ten.log_info( f"chat_completions_stream_worker for input text: [{input_text}] end of segment with sentence [{sentence}] sent" ) except Exception as e: - logger.error( + ten.log_error( f"chat_completions_stream_worker for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {e}" ) except Exception as e: - logger.error( + ten.log_error( f"chat_completions_stream_worker for input text: [{input_text}] failed, err: {e}" ) @@ -282,4 +281,4 @@ def chat_completions_stream_worker(start_time, input_text, memory): args=(start_time, input_text, self.memory), ) thread.start() - logger.info(f"GeminiLLMExtension on_data end") + ten.log_info("GeminiLLMExtension on_data end") diff --git a/agents/ten_packages/extension/gemini_llm_python/log.py b/agents/ten_packages/extension/gemini_llm_python/log.py deleted file mode 100644 index fad21710..00000000 --- a/agents/ten_packages/extension/gemini_llm_python/log.py +++ /dev/null @@ -1,12 +0,0 @@ -import logging -from .extension import EXTENSION_NAME - -logger = logging.getLogger(EXTENSION_NAME) -logger.setLevel(logging.INFO) - -formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s") - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/gemini_llm_python/manifest.json b/agents/ten_packages/extension/gemini_llm_python/manifest.json index 2d9f51b8..67fa9bdf 100644 --- a/agents/ten_packages/extension/gemini_llm_python/manifest.json +++ b/agents/ten_packages/extension/gemini_llm_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "gemini_llm_python", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/gemini_v2v_python/BUILD.gn b/agents/ten_packages/extension/gemini_v2v_python/BUILD.gn index 066a7ee4..f44affe0 100644 --- a/agents/ten_packages/extension/gemini_v2v_python/BUILD.gn +++ b/agents/ten_packages/extension/gemini_v2v_python/BUILD.gn @@ -14,7 +14,6 @@ ten_package("gemini_v2v_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", ] diff --git a/agents/ten_packages/extension/gemini_v2v_python/extension.py b/agents/ten_packages/extension/gemini_v2v_python/extension.py index 56ac680e..77463fde 100644 --- a/agents/ten_packages/extension/gemini_v2v_python/extension.py +++ b/agents/ten_packages/extension/gemini_v2v_python/extension.py @@ -13,7 +13,6 @@ import time from google import genai import numpy as np -from datetime import datetime from typing import Iterable, cast import websockets @@ -28,11 +27,38 @@ ) from ten.audio_frame import AudioFrameDataFmt from ten_ai_base.const import CMD_PROPERTY_RESULT, CMD_TOOL_CALL -from ten_ai_base.llm import AsyncLLMBaseExtension -from dataclasses import dataclass, field -from ten_ai_base import BaseConfig, ChatMemory, EVENT_MEMORY_EXPIRED, EVENT_MEMORY_APPENDED, LLMUsage, LLMCompletionTokensDetails, LLMPromptTokensDetails -from ten_ai_base.types import LLMToolMetadata, LLMToolResult, LLMChatCompletionContentPartParam, TTSPcmOptions -from google.genai.types import LiveServerMessage, LiveClientRealtimeInput, Blob, LiveConnectConfig, LiveConnectConfigDict, GenerationConfig, SpeechConfig, VoiceConfig, PrebuiltVoiceConfig, Content, Part, Tool, FunctionDeclaration, Schema, LiveClientToolResponse, FunctionCall, FunctionResponse +from ten_ai_base import AsyncLLMBaseExtension +from dataclasses import dataclass +from ten_ai_base.config import BaseConfig +from ten_ai_base.chat_memory import ChatMemory +from ten_ai_base.usage import ( + LLMUsage, + LLMCompletionTokensDetails, + LLMPromptTokensDetails, +) +from ten_ai_base.types import ( + LLMToolMetadata, + LLMToolResult, + LLMChatCompletionContentPartParam, + TTSPcmOptions, +) +from google.genai.types import ( + LiveServerMessage, + LiveConnectConfig, + LiveConnectConfigDict, + GenerationConfig, + Content, + Part, + Tool, + FunctionDeclaration, + Schema, + LiveClientToolResponse, + FunctionCall, + FunctionResponse, + SpeechConfig, + VoiceConfig, + PrebuiltVoiceConfig, +) from google.genai.live import AsyncSession from PIL import Image from io import BytesIO @@ -41,13 +67,14 @@ import urllib.parse import google.genai._api_client -google.genai._api_client.urllib = urllib +google.genai._api_client.urllib = urllib # pylint: disable=protected-access CMD_IN_FLUSH = "flush" CMD_IN_ON_USER_JOINED = "on_user_joined" CMD_IN_ON_USER_LEFT = "on_user_left" CMD_OUT_FLUSH = "flush" + class Role(str, Enum): User = "user" Assistant = "assistant" @@ -76,6 +103,7 @@ def rgb2base64jpeg(rgb_data, width, height): # mime_type = "image/jpeg" return base64_encoded_image + def resize_image_keep_aspect(image, max_size=512): """ Resize an image while maintaining its aspect ratio, ensuring the larger dimension is max_size. @@ -108,6 +136,7 @@ def resize_image_keep_aspect(image, max_size=512): return resized_image + @dataclass class GeminiRealtimeConfig(BaseConfig): base_uri: str = "generativelanguage.googleapis.com" @@ -133,13 +162,14 @@ def build_ctx(self) -> dict: "model": self.model, } + class GeminiRealtimeExtension(AsyncLLMBaseExtension): def __init__(self, name): super().__init__(name) self.config: GeminiRealtimeConfig = None self.stopped: bool = False self.connected: bool = False - self.buffer: bytearray = b'' + self.buffer: bytearray = b"" self.memory: ChatMemory = None self.total_usage: LLMUsage = LLMUsage() self.users_count = 0 @@ -153,16 +183,18 @@ def __init__(self, name): self.connect_times = [] self.first_token_times = [] - self.buff: bytearray = b'' + self.buff: bytearray = b"" self.transcript: str = "" self.ctx: dict = {} self.input_end = time.time() self.client = None - self.session:AsyncSession = None - self.leftover_bytes = b'' + self.session: AsyncSession = None + self.leftover_bytes = b"" self.video_task = None self.image_queue = asyncio.Queue() self.video_buff: str = "" + self.loop = None + self.ten_env = None async def on_init(self, ten_env: AsyncTenEnv) -> None: await super().on_init(ten_env) @@ -170,11 +202,12 @@ async def on_init(self, ten_env: AsyncTenEnv) -> None: async def on_start(self, ten_env: AsyncTenEnv) -> None: await super().on_start(ten_env) + self.ten_env = ten_env ten_env.log_debug("on_start") self.loop = asyncio.get_event_loop() - self.config = GeminiRealtimeConfig.create(ten_env=ten_env) + self.config = await GeminiRealtimeConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") if not self.config.api_key: @@ -184,15 +217,13 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: try: self.ctx = self.config.build_ctx() self.ctx["greeting"] = self.config.greeting - self.client = genai.Client( api_key=self.config.api_key, http_options={ - 'api_version': self.config.api_version, - 'url': self.config.base_uri, - } - + "api_version": self.config.api_version, + "url": self.config.base_uri, + }, ) self.loop.create_task(self._loop(ten_env)) self.loop.create_task(self._on_video(ten_env)) @@ -202,22 +233,22 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: traceback.print_exc() self.ten_env.log_error(f"Failed to init client {e}") - self.ten_env = ten_env - async def _loop(self, ten_env: AsyncTenEnv) -> None: while not self.stopped: await asyncio.sleep(1) try: - config:LiveConnectConfig = self._get_session_config() - ten_env.log_info(f"Start listen") - async with self.client.aio.live.connect(model=self.config.model, config=config) as session: - ten_env.log_info(f"Connected") + config: LiveConnectConfig = self._get_session_config() + ten_env.log_info("Start listen") + async with self.client.aio.live.connect( + model=self.config.model, config=config + ) as session: + ten_env.log_info("Connected") session = cast(AsyncSession, session) self.session = session self.connected = True await self._greeting() - + while True: try: async for response in session.receive(): @@ -226,32 +257,49 @@ async def _loop(self, ten_env: AsyncTenEnv) -> None: try: if response.server_content: if response.server_content.interrupted: - ten_env.log_info(f"Interrupted") + ten_env.log_info("Interrupted") await self._flush() continue - elif not response.server_content.turn_complete and response.server_content.model_turn: - for part in response.server_content.model_turn.parts: - self.send_audio_out(ten_env, part.inline_data.data, sample_rate=24000, bytes_per_sample=2, number_of_channels=1) + elif ( + not response.server_content.turn_complete + and response.server_content.model_turn + ): + for ( + part + ) in ( + response.server_content.model_turn.parts + ): + await self.send_audio_out( + ten_env, + part.inline_data.data, + sample_rate=24000, + bytes_per_sample=2, + number_of_channels=1, + ) elif response.server_content.turn_complete: - ten_env.log_info(f"Turn complete") + ten_env.log_info("Turn complete") elif response.setup_complete: - ten_env.log_info(f"Setup complete") + ten_env.log_info("Setup complete") elif response.tool_call: func_calls = response.tool_call.function_calls - self.loop.create_task(self._handle_tool_call(func_calls)) - except Exception as e: + self.loop.create_task( + self._handle_tool_call(func_calls) + ) + except Exception: traceback.print_exc() - ten_env.log_error(f"Failed to handle response") - + ten_env.log_error("Failed to handle response") + await self._flush() - ten_env.log_info(f"Finish listen") + ten_env.log_info("Finish listen") except websockets.exceptions.ConnectionClosedOK: ten_env.log_info("Connection closed") break except Exception as e: self.ten_env.log_error(f"Failed to handle loop {e}") - def send_audio_out(self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcmOptions) -> None: + async def send_audio_out( + self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcmOptions + ) -> None: """End sending audio out.""" sample_rate = args.get("sample_rate", 24000) bytes_per_sample = args.get("bytes_per_sample", 2) @@ -263,11 +311,13 @@ def send_audio_out(self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcm # Check if combined_data length is odd if len(combined_data) % (bytes_per_sample * number_of_channels) != 0: # Save the last incomplete frame - valid_length = len(combined_data) - (len(combined_data) % (bytes_per_sample * number_of_channels)) + valid_length = len(combined_data) - ( + len(combined_data) % (bytes_per_sample * number_of_channels) + ) self.leftover_bytes = combined_data[valid_length:] combined_data = combined_data[:valid_length] else: - self.leftover_bytes = b'' + self.leftover_bytes = b"" if combined_data: f = AudioFrame.create("pcm_frame") @@ -275,13 +325,15 @@ def send_audio_out(self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcm f.set_bytes_per_sample(bytes_per_sample) f.set_number_of_channels(number_of_channels) f.set_data_fmt(AudioFrameDataFmt.INTERLEAVE) - f.set_samples_per_channel(len(combined_data) // (bytes_per_sample * number_of_channels)) + f.set_samples_per_channel( + len(combined_data) // (bytes_per_sample * number_of_channels) + ) f.alloc_buf(len(combined_data)) buff = f.lock_buf() buff[:] = combined_data f.unlock_buf(buff) - ten_env.send_audio_frame(f) - except Exception as e: + await ten_env.send_audio_frame(f) + except Exception: pass # ten_env.log_error(f"error send audio frame, {traceback.format_exc()}") @@ -293,7 +345,9 @@ async def on_stop(self, ten_env: AsyncTenEnv) -> None: if self.session: await self.session.close() - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: await super().on_audio_frame(ten_env, audio_frame) try: stream_id = audio_frame.get_property_int("stream_id") @@ -315,7 +369,7 @@ async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() - ten_env.log_debug("on_cmd name {}".format(cmd_name)) + ten_env.log_debug(f"on_cmd name {cmd_name}") status = StatusCode.OK detail = "success" @@ -339,7 +393,7 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_result = CmdResult.create(status) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) # Not support for now async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: @@ -351,25 +405,26 @@ async def on_video_frame(self, async_ten_env, video_frame): image_width = video_frame.get_width() image_height = video_frame.get_height() await self.image_queue.put([image_data, image_width, image_height]) - - async def _on_video(self, ten_env:AsyncTenEnv): + async def _on_video(self, _: AsyncTenEnv): while True: - + # Process the first frame from the queue [image_data, image_width, image_height] = await self.image_queue.get() self.video_buff = rgb2base64jpeg(image_data, image_width, image_height) - media_chunks = [{ - "data": self.video_buff, - "mime_type": "image/jpeg", - }] + media_chunks = [ + { + "data": self.video_buff, + "mime_type": "image/jpeg", + } + ] try: if self.connected: # ten_env.log_info(f"send image") await self.session.send(media_chunks) except Exception as e: self.ten_env.log_error(f"Failed to send image {e}") - + # Skip remaining frames for the second while not self.image_queue.empty(): await self.image_queue.get() @@ -384,13 +439,15 @@ async def _on_audio(self, buff: bytearray): if self.connected and len(self.buff) >= self.audio_len_threshold: # await self.conn.send_audio_data(self.buff) try: - media_chunks = [{ - "data": base64.b64encode(self.buff).decode(), - "mime_type": "audio/pcm", - }] + media_chunks = [ + { + "data": base64.b64encode(self.buff).decode(), + "mime_type": "audio/pcm", + } + ] # await self.session.send(LiveClientRealtimeInput(media_chunks=media_chunks)) await self.session.send(media_chunks) - self.buff = b'' + self.buff = b"" except Exception as e: # pass self.ten_env.log_error(f"Failed to send audio {e}") @@ -398,88 +455,71 @@ async def _on_audio(self, buff: bytearray): def _get_session_config(self) -> LiveConnectConfigDict: def tool_dict(tool: LLMToolMetadata): required = [] - properties:dict[str, "Schema"] = {} + properties: dict[str, "Schema"] = {} for param in tool.parameters: properties[param.name] = Schema( - type=param.type.upper(), - description=param.description + type=param.type.upper(), description=param.description ) if param.required: required.append(param.name) - t = Tool( - function_declarations=[FunctionDeclaration( - name=tool.name, - description=tool.description, - parameters=Schema( - type="OBJECT", - properties=properties, - required=required + function_declarations=[ + FunctionDeclaration( + name=tool.name, + description=tool.description, + parameters=Schema( + type="OBJECT", properties=properties, required=required + ), ) - )]) + ] + ) return t - tools = [tool_dict(t) for t in self.available_tools] if len(self.available_tools) > 0 else [] + tools = ( + [tool_dict(t) for t in self.available_tools] + if len(self.available_tools) > 0 + else [] + ) - tools.append(Tool( - google_search={} - )) - tools.append(Tool( - code_execution={} - )) + tools.append(Tool(google_search={})) + tools.append(Tool(code_execution={})) config = LiveConnectConfig( response_modalities=["AUDIO"], system_instruction=Content(parts=[Part(text=self.config.prompt)]), tools=tools, # voice is currently not working - # speech_config=SpeechConfig( - # voice_config=VoiceConfig( - # prebuilt_voice_config=PrebuiltVoiceConfig( - # voice_name=self.config.voice - # ) - # ) - # ), + speech_config=SpeechConfig( + voice_config=VoiceConfig( + prebuilt_voice_config=PrebuiltVoiceConfig( + voice_name=self.config.voice + ) + ) + ), generation_config=GenerationConfig( temperature=self.config.temperature, - max_output_tokens=self.config.max_tokens - ) + max_output_tokens=self.config.max_tokens, + ), ) return config - - async def on_tools_update(self, ten_env: AsyncTenEnv, tool: LLMToolMetadata) -> None: + + async def on_tools_update( + self, ten_env: AsyncTenEnv, tool: LLMToolMetadata + ) -> None: """Called when a new tool is registered. Implement this method to process the new tool.""" - self.ten_env.log_info(f"on tools update {tool}") + ten_env.log_info(f"on tools update {tool}") # await self._update_session() - + def _replace(self, prompt: str) -> str: result = prompt for token, value in self.ctx.items(): - result = result.replace("{"+token+"}", value) + result = result.replace("{" + token + "}", value) return result - # Direction: OUT - def _on_audio_delta(self, delta: bytes) -> None: - audio_data = base64.b64decode(delta) - self.ten_env.log_debug(f"on_audio_delta audio_data len {len(audio_data)} samples {len(audio_data) // 2}") - self._dump_audio_if_need(audio_data, Role.Assistant) - - f = AudioFrame.create("pcm_frame") - f.set_sample_rate(self.config.sample_rate) - f.set_bytes_per_sample(2) - f.set_number_of_channels(1) - f.set_data_fmt(AudioFrameDataFmt.INTERLEAVE) - f.set_samples_per_channel(len(audio_data) // 2) - f.alloc_buf(len(audio_data)) - buff = f.lock_buf() - buff[:] = audio_data - f.unlock_buf(buff) - self.ten_env.send_audio_frame(f) - def _send_transcript(self, content: str, role: Role, is_final: bool) -> None: def is_punctuation(char): if char in [",", ",", ".", "。", "?", "?", "!", "!"]: @@ -501,7 +541,13 @@ def parse_sentences(sentence_fragment, content): remain = current_sentence # Any remaining characters form the incomplete sentence return sentences, remain - def send_data(ten_env: AsyncTenEnv, sentence: str, stream_id: int, role: str, is_final: bool): + def send_data( + ten_env: AsyncTenEnv, + sentence: str, + stream_id: int, + role: str, + is_final: bool, + ): try: d = Data.create("text_data") d.set_property_string("text", sentence) @@ -509,21 +555,30 @@ def send_data(ten_env: AsyncTenEnv, sentence: str, stream_id: int, role: str, is d.set_property_string("role", role) d.set_property_int("stream_id", stream_id) ten_env.log_info( - f"send transcript text [{sentence}] stream_id {stream_id} is_final {is_final} end_of_segment {is_final} role {role}") - ten_env.send_data(d) + f"send transcript text [{sentence}] stream_id {stream_id} is_final {is_final} end_of_segment {is_final} role {role}" + ) + asyncio.create_task(ten_env.send_data(d)) except Exception as e: - ten_env.log_error(f"Error send text data {role}: {sentence} {is_final} {e}") + ten_env.log_error( + f"Error send text data {role}: {sentence} {is_final} {e}" + ) stream_id = self.remote_stream_id if role == Role.User else 0 try: if role == Role.Assistant and not is_final: sentences, self.transcript = parse_sentences(self.transcript, content) for s in sentences: - send_data(self.ten_env, s, stream_id, role, is_final) + asyncio.create_task( + send_data(self.ten_env, s, stream_id, role, is_final) + ) else: - send_data(self.ten_env, content, stream_id, role, is_final) + asyncio.create_task( + send_data(self.ten_env, content, stream_id, role, is_final) + ) except Exception as e: - self.ten_env.log_error(f"Error send text data {role}: {content} {is_final} {e}") + self.ten_env.log_error( + f"Error send text data {role}: {content} {is_final} {e}" + ) def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: if not self.config.dump: @@ -532,48 +587,47 @@ def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: with open("{}_{}.pcm".format(role, self.channel_name), "ab") as dump_file: dump_file.write(buf) - async def _handle_tool_call(self, func_calls:list[FunctionCall] ) -> None: + async def _handle_tool_call(self, func_calls: list[FunctionCall]) -> None: function_responses = [] for call in func_calls: tool_call_id = call.id name = call.name arguments = call.args - self.ten_env.log_info(f"_handle_tool_call {tool_call_id} {name} {arguments}") + self.ten_env.log_info( + f"_handle_tool_call {tool_call_id} {name} {arguments}" + ) cmd: Cmd = Cmd.create(CMD_TOOL_CALL) cmd.set_property_string("name", name) cmd.set_property_from_json("arguments", json.dumps(arguments)) result: CmdResult = await self.ten_env.send_cmd(cmd) func_response = FunctionResponse( - id=tool_call_id, - name=name, - response={"error":"Failed to call tool"} + id=tool_call_id, name=name, response={"error": "Failed to call tool"} ) if result.get_status_code() == StatusCode.OK: tool_result: LLMToolResult = json.loads( - result.get_property_to_json(CMD_PROPERTY_RESULT)) - + result.get_property_to_json(CMD_PROPERTY_RESULT) + ) + result_content = tool_result["content"] func_response = FunctionResponse( - id=tool_call_id, - name=name, - response={ - "output": result_content - } + id=tool_call_id, name=name, response={"output": result_content} ) self.ten_env.log_info(f"tool_result: {tool_call_id} {tool_result}") else: - self.ten_env.log_error(f"Tool call failed") + self.ten_env.log_error("Tool call failed") function_responses.append(func_response) # await self.conn.send_request(tool_response) # await self.conn.send_request(ResponseCreate()) self.ten_env.log_info(f"_remote_tool_call finish {name} {arguments}") try: self.ten_env.log_info(f"send tool response {function_responses}") - await self.session.send(LiveClientToolResponse(function_responses=function_responses)) + await self.session.send( + LiveClientToolResponse(function_responses=function_responses) + ) except Exception as e: self.ten_env.log_error(f"Failed to send tool response {e}") - + def _greeting_text(self) -> str: text = "Hi, there." if self.config.language == "zh-CN": @@ -584,41 +638,33 @@ def _greeting_text(self) -> str: text = "안녕하세요" return text - def _convert_tool_params_to_dict(self, tool: LLMToolMetadata): - json = { - "type": "object", - "properties": {}, - "required": [] - } + json_dict = {"type": "object", "properties": {}, "required": []} for param in tool.parameters: - json["properties"][param.name] = { + json_dict["properties"][param.name] = { "type": param.type, - "description": param.description + "description": param.description, } if param.required: - json["required"].append(param.name) + json_dict["required"].append(param.name) - return json - - - def _convert_to_content_parts(self, content: Iterable[LLMChatCompletionContentPartParam]): - content_parts = [] + return json_dict + def _convert_to_content_parts( + self, content: Iterable[LLMChatCompletionContentPartParam] + ): + content_parts = [] if isinstance(content, str): - content_parts.append({ - "type": "text", - "text": content - }) + content_parts.append({"type": "text", "text": content}) else: for part in content: # Only text content is supported currently for v2v model if part["type"] == "text": content_parts.append(part) return content_parts - + async def _greeting(self) -> None: if self.connected and self.users_count == 1: text = self._greeting_text() @@ -631,9 +677,9 @@ async def _flush(self) -> None: try: c = Cmd.create("flush") await self.ten_env.send_cmd(c) - except: - self.ten_env.log_error(f"Error flush") - + except Exception: + self.ten_env.log_error("Error flush") + async def _update_usage(self, usage: dict) -> None: self.total_usage.completion_tokens += usage.get("output_tokens") self.total_usage.prompt_tokens += usage.get("input_tokens") @@ -644,26 +690,54 @@ async def _update_usage(self, usage: dict) -> None: self.total_usage.prompt_tokens_details = LLMPromptTokensDetails() if usage.get("output_token_details"): - self.total_usage.completion_tokens_details.accepted_prediction_tokens += usage["output_token_details"].get("text_tokens") - self.total_usage.completion_tokens_details.audio_tokens += usage["output_token_details"].get("audio_tokens") - + self.total_usage.completion_tokens_details.accepted_prediction_tokens += ( + usage["output_token_details"].get("text_tokens") + ) + self.total_usage.completion_tokens_details.audio_tokens += usage[ + "output_token_details" + ].get("audio_tokens") + if usage.get("input_token_details:"): - self.total_usage.prompt_tokens_details.audio_tokens += usage["input_token_details"].get("audio_tokens") - self.total_usage.prompt_tokens_details.cached_tokens += usage["input_token_details"].get("cached_tokens") - self.total_usage.prompt_tokens_details.text_tokens += usage["input_token_details"].get("text_tokens") + self.total_usage.prompt_tokens_details.audio_tokens += usage[ + "input_token_details" + ].get("audio_tokens") + self.total_usage.prompt_tokens_details.cached_tokens += usage[ + "input_token_details" + ].get("cached_tokens") + self.total_usage.prompt_tokens_details.text_tokens += usage[ + "input_token_details" + ].get("text_tokens") self.ten_env.log_info(f"total usage: {self.total_usage}") data = Data.create("llm_stat") data.set_property_from_json("usage", json.dumps(self.total_usage.model_dump())) if self.connect_times and self.completion_times and self.first_token_times: - data.set_property_from_json("latency", json.dumps({ - "connection_latency_95": np.percentile(self.connect_times, 95), - "completion_latency_95": np.percentile(self.completion_times, 95), - "first_token_latency_95": np.percentile(self.first_token_times, 95), - "connection_latency_99": np.percentile(self.connect_times, 99), - "completion_latency_99": np.percentile(self.completion_times, 99), - "first_token_latency_99": np.percentile(self.first_token_times, 99) - })) - self.ten_env.send_data(data) + data.set_property_from_json( + "latency", + json.dumps( + { + "connection_latency_95": np.percentile(self.connect_times, 95), + "completion_latency_95": np.percentile( + self.completion_times, 95 + ), + "first_token_latency_95": np.percentile( + self.first_token_times, 95 + ), + "connection_latency_99": np.percentile(self.connect_times, 99), + "completion_latency_99": np.percentile( + self.completion_times, 99 + ), + "first_token_latency_99": np.percentile( + self.first_token_times, 99 + ), + } + ), + ) + asyncio.create_task(self.ten_env.send_data(data)) + + async def on_call_chat_completion(self, async_ten_env, **kargs): + raise NotImplementedError + async def on_data_chat_completion(self, async_ten_env, **kargs): + raise NotImplementedError diff --git a/agents/ten_packages/extension/gemini_v2v_python/manifest.json b/agents/ten_packages/extension/gemini_v2v_python/manifest.json index 4c25224e..27cfdacb 100644 --- a/agents/ten_packages/extension/gemini_v2v_python/manifest.json +++ b/agents/ten_packages/extension/gemini_v2v_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -110,21 +110,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/extension/gemini_v2v_python/requirements.txt b/agents/ten_packages/extension/gemini_v2v_python/requirements.txt index dd1f0a7c..a6d6d5a8 100644 --- a/agents/ten_packages/extension/gemini_v2v_python/requirements.txt +++ b/agents/ten_packages/extension/gemini_v2v_python/requirements.txt @@ -1,2 +1,2 @@ asyncio -google-genai==0.2.2 \ No newline at end of file +google-genai==0.3.0 \ No newline at end of file diff --git a/agents/ten_packages/extension/glue_python_async/extension.py b/agents/ten_packages/extension/glue_python_async/extension.py index 56a13b60..cae63e44 100644 --- a/agents/ten_packages/extension/glue_python_async/extension.py +++ b/agents/ten_packages/extension/glue_python_async/extension.py @@ -25,9 +25,26 @@ Data, ) -from ten_ai_base import BaseConfig, ChatMemory, LLMUsage, LLMCompletionTokensDetails, LLMPromptTokensDetails, EVENT_MEMORY_APPENDED -from ten_ai_base.llm import AsyncLLMBaseExtension, LLMCallCompletionArgs, LLMDataCompletionArgs, LLMToolMetadata -from ten_ai_base.types import LLMChatCompletionUserMessageParam, LLMToolResult +from ten_ai_base.config import BaseConfig +from ten_ai_base.chat_memory import ( + ChatMemory, + EVENT_MEMORY_APPENDED, +) +from ten_ai_base.usage import ( + LLMUsage, + LLMCompletionTokensDetails, + LLMPromptTokensDetails, +) +from ten_ai_base import ( + AsyncLLMBaseExtension, +) +from ten_ai_base.types import ( + LLMChatCompletionUserMessageParam, + LLMToolResult, + LLMCallCompletionArgs, + LLMDataCompletionArgs, + LLMToolMetadata, +) CMD_IN_FLUSH = "flush" CMD_IN_ON_USER_JOINED = "on_user_joined" @@ -43,11 +60,13 @@ CMD_PROPERTY_RESULT = "tool_result" + def is_punctuation(char): if char in [",", ",", ".", "。", "?", "?", "!", "!"]: return True return False + def parse_sentences(sentence_fragment, content): sentences = [] current_sentence = sentence_fragment @@ -62,34 +81,41 @@ def parse_sentences(sentence_fragment, content): remain = current_sentence return sentences, remain + class ToolCallFunction(BaseModel): name: str | None = None arguments: str | None = None + class ToolCall(BaseModel): index: int type: str = "function" id: str | None = None function: ToolCallFunction + class ToolCallResponse(BaseModel): id: str response: LLMToolResult error: str | None = None + class Delta(BaseModel): content: str | None = None tool_calls: List[ToolCall] = None + class Choice(BaseModel): delta: Delta = None index: int finish_reason: str | None + class ResponseChunk(BaseModel): choices: List[Choice] usage: LLMUsage | None = None + @dataclass class GlueConfig(BaseConfig): api_url: str = "http://localhost:8000/chat/completions" @@ -105,20 +131,24 @@ class GlueConfig(BaseConfig): extra_context: dict = field(default_factory=dict) enable_storage: bool = False + class AsyncGlueExtension(AsyncLLMBaseExtension): - config : GlueConfig = None - ten_env: AsyncTenEnv = None - loop: asyncio.AbstractEventLoop = None - stopped: bool = False - memory: ChatMemory = None - total_usage: LLMUsage = LLMUsage() - users_count = 0 + def __init__(self, name): + super().__init__(name) + + self.config: GlueConfig = None + self.ten_env: AsyncTenEnv = None + self.loop: asyncio.AbstractEventLoop = None + self.stopped: bool = False + self.memory: ChatMemory = None + self.total_usage: LLMUsage = LLMUsage() + self.users_count = 0 - completion_times = [] - connect_times = [] - first_token_times = [] + self.completion_times = [] + self.connect_times = [] + self.first_token_times = [] - remote_stream_id: int = 999 # TODO + self.remote_stream_id: int = 999 async def on_init(self, ten_env: AsyncTenEnv) -> None: await super().on_init(ten_env) @@ -130,7 +160,7 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: self.loop = asyncio.get_event_loop() - self.config = GlueConfig.create(ten_env=ten_env) + self.config = await GlueConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") self.memory = ChatMemory(self.config.max_history) @@ -143,7 +173,7 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: for i in history: self.memory.put(i) ten_env.log_info(f"on retrieve context {history}") - except Exception as e: + except Exception: ten_env.log_error("Failed to handle retrieve result {e}") else: ten_env.log_warn("Failed to retrieve content") @@ -187,18 +217,22 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_result = CmdResult.create(status) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) - async def on_call_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs) -> any: - raise Exception("Not implemented") + async def on_call_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs + ) -> any: + raise RuntimeError("Not implemented") - async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs) -> None: - input: LLMChatCompletionUserMessageParam = kargs.get("messages", []) + async def on_data_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs + ) -> None: + input_messages: LLMChatCompletionUserMessageParam = kargs.get("messages", []) messages = [] if self.config.prompt: messages.append({"role": "system", "content": self.config.prompt}) - + history = self.memory.get() while history: if history[0].get("role") == "tool": @@ -207,21 +241,21 @@ async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCo if history[0].get("role") == "assistant" and history[0].get("tool_calls"): history = history[1:] continue - + # Skip the first tool role break - + messages.extend(history) - if not input: + if not input_messages: ten_env.log_warn("No message in data") else: - messages.extend(input) - for i in input: + messages.extend(input_messages) + for i in input_messages: self.memory.put(i) def tool_dict(tool: LLMToolMetadata): - json = { + json_dict = { "type": "function", "function": { "name": tool.name, @@ -230,29 +264,29 @@ def tool_dict(tool: LLMToolMetadata): "type": "object", "properties": {}, "required": [], - "additionalProperties": False + "additionalProperties": False, }, }, - "strict": True + "strict": True, } for param in tool.parameters: - json["function"]["parameters"]["properties"][param.name] = { + json_dict["function"]["parameters"]["properties"][param.name] = { "type": param.type, - "description": param.description + "description": param.description, } if param.required: - json["function"]["parameters"]["required"].append(param.name) + json_dict["function"]["parameters"]["required"].append(param.name) + + return json_dict - return json + def trim_xml(input_string): + return re.sub(r"<[^>]+>", "", input_string).strip() - def trim_xml(input_string): - return re.sub(r'<[^>]+>', '', input_string).strip() - tools = [] for tool in self.available_tools: tools.append(tool_dict(tool)) - + total_output = "" sentence_fragment = "" calls = {} @@ -263,7 +297,6 @@ def trim_xml(input_string): response = self._stream_chat(messages=messages, tools=tools) async for message in response: self.ten_env.log_debug(f"content: {message}") - # TODO: handle tool call try: c = ResponseChunk(**message) if c.choices: @@ -271,23 +304,33 @@ def trim_xml(input_string): if first_token_time is None: first_token_time = time.time() self.first_token_times.append(first_token_time - start_time) - + content = c.choices[0].delta.content if self.config.ssml_enabled and content.startswith(""): content = trim_xml(content) total_output += content - sentences, sentence_fragment = parse_sentences(sentence_fragment, content) + sentences, sentence_fragment = parse_sentences( + sentence_fragment, content + ) for s in sentences: await self._send_text(s) if c.choices[0].delta.tool_calls: - self.ten_env.log_info(f"tool_calls: {c.choices[0].delta.tool_calls}") + self.ten_env.log_info( + f"tool_calls: {c.choices[0].delta.tool_calls}" + ) for call in c.choices[0].delta.tool_calls: if call.index not in calls: - calls[call.index] = ToolCall(id=call.id, index=call.index, function=ToolCallFunction(name="", arguments="")) + calls[call.index] = ToolCall( + id=call.id, + index=call.index, + function=ToolCallFunction(name="", arguments=""), + ) if call.function.name: calls[call.index].function.name += call.function.name if call.function.arguments: - calls[call.index].function.arguments += call.function.arguments + calls[ + call.index + ].function.arguments += call.function.arguments if c.usage: self.ten_env.log_info(f"usage: {c.usage}") await self._update_usage(c.usage) @@ -298,10 +341,10 @@ def trim_xml(input_string): await self._send_text(sentence_fragment) end_time = time.time() self.completion_times.append(end_time - start_time) - + if total_output: self.memory.put({"role": "assistant", "content": total_output}) - + if calls: tasks = [] tool_calls = [] @@ -314,14 +357,22 @@ def trim_xml(input_string): for r in responses: content = r.response["content"] self.ten_env.log_info(f"tool call response: {content} {r.id}") - self.memory.put({"role": "tool", "content": json.dumps(content), "tool_call_id": r.id}) - + self.memory.put( + { + "role": "tool", + "content": json.dumps(content), + "tool_call_id": r.id, + } + ) + # request again to let the model know the tool call results await self.on_data_chat_completion(ten_env) - + self.ten_env.log_info(f"total_output: {total_output} {calls}") - async def on_tools_update(self, ten_env: AsyncTenEnv, tool: LLMToolMetadata) -> None: + async def on_tools_update( + self, ten_env: AsyncTenEnv, tool: LLMToolMetadata + ) -> None: # Implement the logic for tool updates return await super().on_tools_update(ten_env, tool) @@ -334,29 +385,37 @@ async def handle_tool_call(self, call: ToolCall) -> ToolCallResponse: result: CmdResult = await self.ten_env.send_cmd(cmd) if result.get_status_code() == StatusCode.OK: tool_result: LLMToolResult = json.loads( - result.get_property_to_json(CMD_PROPERTY_RESULT)) + result.get_property_to_json(CMD_PROPERTY_RESULT) + ) self.ten_env.log_info(f"tool_result: {call} {tool_result}") return ToolCallResponse(id=call.id, response=tool_result) else: - self.ten_env.log_error(f"Tool call failed") - return ToolCallResponse(id=call.id, error=f"Tool call failed with status code {result.get_status_code()}") + self.ten_env.log_error("Tool call failed") + return ToolCallResponse( + id=call.id, + error=f"Tool call failed with status code {result.get_status_code()}", + ) async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: data_name = data.get_name() - ten_env.log_info("on_data name {}".format(data_name)) + ten_env.log_info(f"on_data name {data_name}") is_final = False input_text = "" try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) except Exception as err: - ten_env.log_info(f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + ten_env.log_info( + f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) except Exception as err: - ten_env.log_info(f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + ten_env.log_info( + f"GetProperty optional {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) if not is_final: ten_env.log_info("ignore non-final input") @@ -368,23 +427,28 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: ten_env.log_info(f"OnData input text: [{input_text}]") # Start an asynchronous task for handling chat completion - message = LLMChatCompletionUserMessageParam( - role="user", content=input_text) + message = LLMChatCompletionUserMessageParam(role="user", content=input_text) await self.queue_input_item(False, messages=[message]) - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: pass - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + async def on_video_frame( + self, ten_env: AsyncTenEnv, video_frame: VideoFrame + ) -> None: pass async def _send_text(self, text: str) -> None: data = Data.create("text_data") data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, text) data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_END_OF_SEGMENT, True) - self.ten_env.send_data(data) + asyncio.create_task(self.ten_env.send_data(data)) - async def _stream_chat(self, messages: List[Any], tools: List[Any]) -> AsyncGenerator[dict, None]: + async def _stream_chat( + self, messages: List[Any], tools: List[Any] + ) -> AsyncGenerator[dict, None]: async with aiohttp.ClientSession() as session: try: payload = { @@ -394,32 +458,34 @@ async def _stream_chat(self, messages: List[Any], tools: List[Any]) -> AsyncGene "model": "gpt-3.5-turbo", "stream": True, "stream_options": {"include_usage": True}, - "ssml_enabled": self.config.ssml_enabled + "ssml_enabled": self.config.ssml_enabled, } if self.config.context_enabled: - payload["context"] = { - **self.config.extra_context - } + payload["context"] = {**self.config.extra_context} self.ten_env.log_info(f"payload before sending: {json.dumps(payload)}") headers = { "Authorization": f"Bearer {self.config.token}", - "Content-Type": "application/json" + "Content-Type": "application/json", } - + start_time = time.time() - async with session.post(self.config.api_url, json=payload, headers=headers) as response: + async with session.post( + self.config.api_url, json=payload, headers=headers + ) as response: if response.status != 200: r = await response.json() - self.ten_env.log_error(f"Received unexpected status {r} from the server.") + self.ten_env.log_error( + f"Received unexpected status {r} from the server." + ) if self.config.failure_info: await self._send_text(self.config.failure_info) return end_time = time.time() self.connect_times.append(end_time - start_time) - + async for line in response.content: if line: - l = line.decode('utf-8').strip() + l = line.decode("utf-8").strip() if l.startswith("data:"): content = l[5:].strip() if content == "[DONE]": @@ -432,50 +498,75 @@ async def _stream_chat(self, messages: List[Any], tools: List[Any]) -> AsyncGene finally: await session.close() session = None - + async def _update_usage(self, usage: LLMUsage) -> None: if not self.config.rtm_enabled: return - + self.total_usage.completion_tokens += usage.completion_tokens self.total_usage.prompt_tokens += usage.prompt_tokens self.total_usage.total_tokens += usage.total_tokens - + if self.total_usage.completion_tokens_details is None: self.total_usage.completion_tokens_details = LLMCompletionTokensDetails() if self.total_usage.prompt_tokens_details is None: self.total_usage.prompt_tokens_details = LLMPromptTokensDetails() - + if usage.completion_tokens_details: - self.total_usage.completion_tokens_details.accepted_prediction_tokens += usage.completion_tokens_details.accepted_prediction_tokens - self.total_usage.completion_tokens_details.audio_tokens += usage.completion_tokens_details.audio_tokens - self.total_usage.completion_tokens_details.reasoning_tokens += usage.completion_tokens_details.reasoning_tokens - self.total_usage.completion_tokens_details.rejected_prediction_tokens += usage.completion_tokens_details.rejected_prediction_tokens - + self.total_usage.completion_tokens_details.accepted_prediction_tokens += ( + usage.completion_tokens_details.accepted_prediction_tokens + ) + self.total_usage.completion_tokens_details.audio_tokens += ( + usage.completion_tokens_details.audio_tokens + ) + self.total_usage.completion_tokens_details.reasoning_tokens += ( + usage.completion_tokens_details.reasoning_tokens + ) + self.total_usage.completion_tokens_details.rejected_prediction_tokens += ( + usage.completion_tokens_details.rejected_prediction_tokens + ) + if usage.prompt_tokens_details: - self.total_usage.prompt_tokens_details.audio_tokens += usage.prompt_tokens_details.audio_tokens - self.total_usage.prompt_tokens_details.cached_tokens += usage.prompt_tokens_details.cached_tokens - + self.total_usage.prompt_tokens_details.audio_tokens += ( + usage.prompt_tokens_details.audio_tokens + ) + self.total_usage.prompt_tokens_details.cached_tokens += ( + usage.prompt_tokens_details.cached_tokens + ) + self.ten_env.log_info(f"total usage: {self.total_usage}") data = Data.create("llm_stat") data.set_property_from_json("usage", json.dumps(self.total_usage.model_dump())) if self.connect_times and self.completion_times and self.first_token_times: - data.set_property_from_json("latency", json.dumps({ - "connection_latency_95": np.percentile(self.connect_times, 95), - "completion_latency_95": np.percentile(self.completion_times, 95), - "first_token_latency_95": np.percentile(self.first_token_times, 95), - "connection_latency_99": np.percentile(self.connect_times, 99), - "completion_latency_99": np.percentile(self.completion_times, 99), - "first_token_latency_99": np.percentile(self.first_token_times, 99) - })) - self.ten_env.send_data(data) + data.set_property_from_json( + "latency", + json.dumps( + { + "connection_latency_95": np.percentile(self.connect_times, 95), + "completion_latency_95": np.percentile( + self.completion_times, 95 + ), + "first_token_latency_95": np.percentile( + self.first_token_times, 95 + ), + "connection_latency_99": np.percentile(self.connect_times, 99), + "completion_latency_99": np.percentile( + self.completion_times, 99 + ), + "first_token_latency_99": np.percentile( + self.first_token_times, 99 + ), + } + ), + ) + asyncio.create_task(self.ten_env.send_data(data)) async def _on_memory_appended(self, message: dict) -> None: self.ten_env.log_info(f"Memory appended: {message}") if not self.config.enable_storage: return - + role = message.get("role") stream_id = self.remote_stream_id if role == "user" else 0 try: @@ -483,6 +574,6 @@ async def _on_memory_appended(self, message: dict) -> None: d.set_property_string("text", message.get("content")) d.set_property_string("role", role) d.set_property_int("stream_id", stream_id) - self.ten_env.send_data(d) + asyncio.create_task(self.ten_env.send_data(d)) except Exception as e: self.ten_env.log_error(f"Error send append_context data {message} {e}") diff --git a/agents/ten_packages/extension/glue_python_async/manifest.json b/agents/ten_packages/extension/glue_python_async/manifest.json index a396372b..d3331ecb 100644 --- a/agents/ten_packages/extension/glue_python_async/manifest.json +++ b/agents/ten_packages/extension/glue_python_async/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "glue_python_async", - "version": "0.3.1", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/http_server_python/__init__.py b/agents/ten_packages/extension/http_server_python/__init__.py index 0bcc331c..f24311c3 100644 --- a/agents/ten_packages/extension/http_server_python/__init__.py +++ b/agents/ten_packages/extension/http_server_python/__init__.py @@ -1,4 +1 @@ from . import http_server_addon -from .log import logger - -logger.info("http_server_python extension loaded") diff --git a/agents/ten_packages/extension/http_server_python/http_server_addon.py b/agents/ten_packages/extension/http_server_python/http_server_addon.py index 97f269aa..49fa3f14 100644 --- a/agents/ten_packages/extension/http_server_python/http_server_addon.py +++ b/agents/ten_packages/extension/http_server_python/http_server_addon.py @@ -8,7 +8,6 @@ @register_addon_as_extension("http_server_python") class HTTPServerExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context): - from .log import logger from .http_server_extension import HTTPServerExtension - logger.info("on_create_instance") + ten.log_info("on_create_instance") ten.on_create_instance_done(HTTPServerExtension(addon_name), context) diff --git a/agents/ten_packages/extension/http_server_python/http_server_extension.py b/agents/ten_packages/extension/http_server_python/http_server_extension.py index 4148c7c9..6b44fd5b 100644 --- a/agents/ten_packages/extension/http_server_python/http_server_extension.py +++ b/agents/ten_packages/extension/http_server_python/http_server_extension.py @@ -5,7 +5,6 @@ StatusCode, CmdResult, ) -from .log import logger from http.server import HTTPServer, BaseHTTPRequestHandler import threading from functools import partial @@ -13,31 +12,31 @@ class HTTPHandler(BaseHTTPRequestHandler): def __init__(self, ten, *args, directory=None, **kwargs): - logger.info("new handler: %s %s %s", directory, args, kwargs) + ten.log_info("new handler: %s %s %s", directory, args, kwargs) self.ten = ten super().__init__(*args, **kwargs) def do_POST(self): - logger.info("post request incoming %s", self.path) + self.ten.log_info("post request incoming %s", self.path) if self.path == "/cmd": try: content_length = int(self.headers["Content-Length"]) - input = self.rfile.read(content_length).decode("utf-8") - logger.info("incoming request %s", input) + input_file = self.rfile.read(content_length).decode("utf-8") + self.ten.log_info("incoming request %s", input_file) self.ten.send_cmd( - Cmd.create_from_json(input), - lambda ten, result: logger.info( - "finish send_cmd from http server %s %s", input, result + Cmd.create_from_json(input_file), + lambda ten, result: ten.log_info( + "finish send_cmd from http server %s %s", input_file, result ), ) self.send_response_only(200) self.end_headers() except Exception as e: - logger.warning("failed to handle request, err {}".format(e)) + self.ten.log_warn(f"failed to handle request, err {e}") self.send_response_only(500) self.end_headers() else: - logger.warning("invalid path: %s", self.path) + self.ten.log_warn("invalid path: %s", self.path) self.send_response_only(404) self.end_headers() @@ -54,13 +53,12 @@ def __init__(self, name: str): def on_start(self, ten: TenEnv): self.listen_addr = ten.get_property_string("listen_addr") self.listen_port = ten.get_property_int("listen_port") - """ - white_list = ten.get_property_string("cmd_white_list") - if len(white_list) > 0: - self.cmd_white_list = white_list.split(",") - """ - logger.info( + # white_list = ten.get_property_string("cmd_white_list") + # if len(white_list) > 0: + # self.cmd_white_list = white_list.split(",") + + ten.log_info( "HTTPServerExtension on_start %s:%d, %s", self.listen_addr, self.listen_port, @@ -76,14 +74,14 @@ def on_start(self, ten: TenEnv): ten.on_start_done() def on_stop(self, ten: TenEnv): - logger.info("on_stop") + ten.log_info("on_stop") self.server.shutdown() self.thread.join() ten.on_stop_done() def on_cmd(self, ten: TenEnv, cmd: Cmd): cmd_json = cmd.to_json() - logger.info("on_cmd json: " + cmd_json) + ten.log_info(f"on_cmd json: {cmd_json}") cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "ok") ten.return_result(cmd_result, cmd) diff --git a/agents/ten_packages/extension/http_server_python/log.py b/agents/ten_packages/extension/http_server_python/log.py deleted file mode 100644 index a7b47d79..00000000 --- a/agents/ten_packages/extension/http_server_python/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("http_server_python") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/http_server_python/manifest.json b/agents/ten_packages/extension/http_server_python/manifest.json index 913f3a1a..d0241346 100644 --- a/agents/ten_packages/extension/http_server_python/manifest.json +++ b/agents/ten_packages/extension/http_server_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "http_server_python", - "version": "0.5.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/interrupt_detector/manifest.json b/agents/ten_packages/extension/interrupt_detector/manifest.json index 5c2cd8e8..5c87b632 100644 --- a/agents/ten_packages/extension/interrupt_detector/manifest.json +++ b/agents/ten_packages/extension/interrupt_detector/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "interrupt_detector", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/interrupt_detector_python/extension.py b/agents/ten_packages/extension/interrupt_detector_python/extension.py index 7ddb310e..a01cdc8a 100644 --- a/agents/ten_packages/extension/interrupt_detector_python/extension.py +++ b/agents/ten_packages/extension/interrupt_detector_python/extension.py @@ -20,6 +20,7 @@ TEXT_DATA_TEXT_FIELD = "text" TEXT_DATA_FINAL_FIELD = "is_final" + class InterruptDetectorExtension(Extension): def on_start(self, ten: TenEnv) -> None: ten.log_info("on_start") @@ -33,7 +34,7 @@ def send_flush_cmd(self, ten: TenEnv) -> None: flush_cmd = Cmd.create(CMD_NAME_FLUSH) ten.send_cmd( flush_cmd, - lambda ten, result: ten.log_info("send_cmd done"), + lambda ten, result, _: ten.log_info("send_cmd done"), ) ten.log_info(f"sent cmd: {CMD_NAME_FLUSH}") @@ -46,8 +47,9 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: self.send_flush_cmd(ten) # then forward the cmd to downstream - cmd_json = cmd.to_json() - new_cmd = Cmd.create_from_json(cmd_json) + cmd_json = cmd.get_property_to_json() + new_cmd = Cmd.create(cmd_name) + new_cmd.set_property_from_json(None, cmd_json) ten.send_cmd( new_cmd, lambda ten, result: ten.log_info("send_cmd done"), @@ -64,7 +66,7 @@ def on_data(self, ten: TenEnv, data: Data) -> None: example: {name: text_data, properties: {text: "hello", is_final: false} """ - ten.log_info(f"on_data") + ten.log_info("on_data") try: text = data.get_property_string(TEXT_DATA_TEXT_FIELD) diff --git a/agents/ten_packages/extension/interrupt_detector_python/manifest.json b/agents/ten_packages/extension/interrupt_detector_python/manifest.json index 92b70555..2579f8e4 100644 --- a/agents/ten_packages/extension/interrupt_detector_python/manifest.json +++ b/agents/ten_packages/extension/interrupt_detector_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "interrupt_detector_python", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/llama_index_chat_engine/__init__.py b/agents/ten_packages/extension/llama_index_chat_engine/__init__.py index 55408286..f3c731cd 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/__init__.py +++ b/agents/ten_packages/extension/llama_index_chat_engine/__init__.py @@ -1,4 +1 @@ from . import addon -from .log import logger - -logger.info("llama_index_chat_engine extension loaded") diff --git a/agents/ten_packages/extension/llama_index_chat_engine/addon.py b/agents/ten_packages/extension/llama_index_chat_engine/addon.py index e127870d..9ef17901 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/addon.py +++ b/agents/ten_packages/extension/llama_index_chat_engine/addon.py @@ -1,10 +1,14 @@ -from ten import Addon, register_addon_as_extension, TenEnv +from ten import ( + Addon, + register_addon_as_extension, + TenEnv, +) @register_addon_as_extension("llama_index_chat_engine") -class LlamaIndexExtensionAddon(Addon): - def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: +class LlamaIndexChatEngineExtensionAddon(Addon): + def on_create_instance(self, ten: TenEnv, addon_name: str, context): from .extension import LlamaIndexExtension - from .log import logger - logger.info("on_create_instance") + + ten.log_info("on_create_instance") ten.on_create_instance_done(LlamaIndexExtension(addon_name), context) diff --git a/agents/ten_packages/extension/llama_index_chat_engine/extension.py b/agents/ten_packages/extension/llama_index_chat_engine/extension.py index c28871b1..8ad2f4b8 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/extension.py +++ b/agents/ten_packages/extension/llama_index_chat_engine/extension.py @@ -13,7 +13,6 @@ StatusCode, CmdResult, ) -from .log import logger import queue, threading from datetime import datetime @@ -44,29 +43,27 @@ def _send_text_data(self, ten: TenEnv, text: str, end_of_segment: bool): output_data.set_property_string("text", text) output_data.set_property_bool("end_of_segment", end_of_segment) ten.send_data(output_data) - logger.info("text [{}] end_of_segment {} sent".format(text, end_of_segment)) + ten.log_info(f"text [{text}] end_of_segment {end_of_segment} sent") except Exception as err: - logger.info( - "text [{}] end_of_segment {} send failed, err {}".format( - text, end_of_segment, err - ) + ten.log_info( + f"text [{text}] end_of_segment {end_of_segment} send failed, err {err}" ) def on_start(self, ten: TenEnv) -> None: - logger.info("on_start") + ten.log_info("on_start") greeting = None try: greeting = ten.get_property_string(PROPERTY_GREETING) except Exception as err: - logger.warning(f"get {PROPERTY_GREETING} property failed, err: {err}") + ten.log_warn(f"get {PROPERTY_GREETING} property failed, err: {err}") try: self.chat_memory_token_limit = ten.get_property_int( PROPERTY_CHAT_MEMORY_TOKEN_LIMIT ) except Exception as err: - logger.warning( + ten.log_warn( f"get {PROPERTY_CHAT_MEMORY_TOKEN_LIMIT} property failed, err: {err}" ) @@ -76,6 +73,7 @@ def on_start(self, ten: TenEnv) -> None: # enable chat memory from llama_index.core.storage.chat_store import SimpleChatStore from llama_index.core.memory import ChatMemoryBuffer + self.chat_memory = ChatMemoryBuffer.from_defaults( token_limit=self.chat_memory_token_limit, chat_store=SimpleChatStore(), @@ -88,7 +86,7 @@ def on_start(self, ten: TenEnv) -> None: ten.on_start_done() def on_stop(self, ten: TenEnv) -> None: - logger.info("on_stop") + ten.log_info("on_stop") self.stop = True self.flush() @@ -103,23 +101,19 @@ def on_stop(self, ten: TenEnv) -> None: def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() - logger.info("on_cmd {}".format(cmd_name)) + ten.log_info("on_cmd {cmd_name}") if cmd_name == "file_chunked": coll = cmd.get_property_string("collection") # only update selected collection if empty if len(self.collection_name) == 0: - logger.info( - "collection for querying has been updated from {} to {}".format( - self.collection_name, coll - ) + ten.log_info( + f"collection for querying has been updated from {self.collection_name} to {coll}" ) self.collection_name = coll else: - logger.info( - "new collection {} incoming but won't change current collection_name {}".format( - coll, self.collection_name - ) + ten.log_info( + f"new collection {coll} incoming but won't change current collection_name {self.collection_name}" ) # notify user @@ -135,10 +129,8 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: self.queue.put((file_chunk_text, datetime.now(), TASK_TYPE_GREETING)) elif cmd_name == "update_querying_collection": coll = cmd.get_property_string("collection") - logger.info( - "collection for querying has been updated from {} to {}".format( - self.collection_name, coll - ) + ten.log_info( + f"collection for querying has been updated from {self.collection_name} to {coll}" ) self.collection_name = coll @@ -164,21 +156,21 @@ def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: def on_data(self, ten: TenEnv, data: Data) -> None: is_final = data.get_property_bool("is_final") if not is_final: - logger.info("on_data ignore non final") + ten.log_info("on_data ignore non final") return inputText = data.get_property_string("text") if len(inputText) == 0: - logger.info("on_data ignore empty text") + ten.log_info("on_data ignore empty text") return ts = datetime.now() - logger.info("on_data text [%s], ts [%s]", inputText, ts) + ten.log_info("on_data text [%s], ts [%s]", inputText, ts) self.queue.put((inputText, ts, TASK_TYPE_CHAT_REQUEST)) def async_handle(self, ten: TenEnv): - logger.info("async_handle started") + ten.log_info("async_handle started") while not self.stop: try: value = self.queue.get() @@ -187,10 +179,8 @@ def async_handle(self, ten: TenEnv): input_text, ts, task_type = value if ts < self.get_outdated_ts(): - logger.info( - "text [{}] ts [{}] task_type [{}] dropped due to outdated".format( - input_text, ts, task_type - ) + ten.log_info( + f"text [{input_text}] ts [{ts}] task_type [{task_type}] dropped due to outdated" ) continue @@ -199,7 +189,7 @@ def async_handle(self, ten: TenEnv): self._send_text_data(ten, input_text, True) continue - logger.info("process input text [%s] ts [%s]", input_text, ts) + ten.log_info("process input text [%s] ts [%s]", input_text, ts) # lazy import packages which requires long time to load from .llama_llm import LlamaLLM @@ -209,6 +199,7 @@ def async_handle(self, ten: TenEnv): chat_engine = None if len(self.collection_name) > 0: from llama_index.core.chat_engine import ContextChatEngine + chat_engine = ContextChatEngine.from_defaults( llm=LlamaLLM(ten=ten), retriever=LlamaRetriever(ten=ten, coll=self.collection_name), @@ -232,6 +223,7 @@ def async_handle(self, ten: TenEnv): ) else: from llama_index.core.chat_engine import SimpleChatEngine + chat_engine = SimpleChatEngine.from_defaults( llm=LlamaLLM(ten=ten), system_prompt=( @@ -250,7 +242,7 @@ def async_handle(self, ten: TenEnv): if self.stop: break if ts < self.get_outdated_ts(): - logger.info( + ten.log_info( "stream_chat coming responses dropped due to outdated for input text [%s] ts [%s] ", input_text, ts, @@ -264,8 +256,8 @@ def async_handle(self, ten: TenEnv): # send out end_of_segment self._send_text_data(ten, "", True) except Exception as e: - logger.exception(e) - logger.info("async_handle stoped") + ten.log_error(str(e)) + ten.log_info("async_handle stoped") def flush(self): with self.outdate_ts_lock: @@ -277,3 +269,4 @@ def flush(self): def get_outdated_ts(self): with self.outdate_ts_lock: return self.outdate_ts + diff --git a/agents/ten_packages/extension/llama_index_chat_engine/llama_embedding.py b/agents/ten_packages/extension/llama_index_chat_engine/llama_embedding.py index 7ed928ff..dd125511 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/llama_embedding.py +++ b/agents/ten_packages/extension/llama_index_chat_engine/llama_embedding.py @@ -1,11 +1,11 @@ from typing import Any, List import threading from llama_index.core.embeddings import BaseEmbedding -from .log import logger import json from ten import ( Cmd, CmdResult, + TenEnv, ) EMBED_CMD = "embed" @@ -19,7 +19,7 @@ def embed_from_resp(cmd_result: CmdResult) -> List[float]: class LlamaEmbedding(BaseEmbedding): ten: Any - def __init__(self, ten): + def __init__(self, ten: TenEnv): """Creates a new Llama embedding interface.""" super().__init__() self.ten = ten @@ -35,9 +35,7 @@ async def _aget_text_embedding(self, text: str) -> List[float]: return self._get_text_embedding(text) def _get_query_embedding(self, query: str) -> List[float]: - logger.info( - "LlamaEmbedding generate embeddings for the query: {}".format(query) - ) + self.ten.log_info(f"LlamaEmbedding generate embeddings for the query: {query}") wait_event = threading.Event() resp: List[float] @@ -45,7 +43,7 @@ def callback(_, result): nonlocal resp nonlocal wait_event - logger.debug("LlamaEmbedding embedding received") + self.ten.log_debug("LlamaEmbedding embedding received") resp = embed_from_resp(result) wait_event.set() @@ -61,5 +59,5 @@ def _get_text_embedding(self, text: str) -> List[float]: # for texts embedding, will not be called in this module def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: - logger.warning("not implemented") + self.ten.log_warn("not implemented") return [] diff --git a/agents/ten_packages/extension/llama_index_chat_engine/llama_llm.py b/agents/ten_packages/extension/llama_index_chat_engine/llama_llm.py index 9a5f83e9..785f903b 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/llama_llm.py +++ b/agents/ten_packages/extension/llama_index_chat_engine/llama_llm.py @@ -15,11 +15,10 @@ from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback from llama_index.core.llms.custom import CustomLLM -from .log import logger -from ten import Cmd, StatusCode, CmdResult +from ten import Cmd, StatusCode, CmdResult, TenEnv -def chat_from_llama_response(cmd_result: CmdResult) -> ChatResponse: +def chat_from_llama_response(cmd_result: CmdResult) -> ChatResponse | None: status = cmd_result.get_status_code() if status != StatusCode.OK: return None @@ -39,7 +38,7 @@ def _messages_str_from_chat_messages(messages: Sequence[ChatMessage]) -> str: class LlamaLLM(CustomLLM): ten: Any - def __init__(self, ten): + def __init__(self, ten: TenEnv): """Creates a new Llama model interface.""" super().__init__() self.ten = ten @@ -47,7 +46,6 @@ def __init__(self, ten): @property def metadata(self) -> LLMMetadata: return LLMMetadata( - # TODO: fix metadata context_window=1024, num_output=512, model_name="llama_llm", @@ -56,13 +54,13 @@ def metadata(self) -> LLMMetadata: @llm_chat_callback() def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: - logger.debug("LlamaLLM chat start") + self.ten.log_debug("LlamaLLM chat start") resp: ChatResponse wait_event = threading.Event() def callback(_, result): - logger.debug("LlamaLLM chat callback done") + self.ten.log_debug("LlamaLLM chat callback done") nonlocal resp nonlocal wait_event resp = chat_from_llama_response(result) @@ -73,11 +71,7 @@ def callback(_, result): cmd = Cmd.create("call_chat") cmd.set_property_string("messages", messages_str) cmd.set_property_bool("stream", False) - logger.info( - "LlamaLLM chat send_cmd {}, messages {}".format( - cmd.get_name(), messages_str - ) - ) + self.ten.log_info(f"LlamaLLM chat send_cmd {cmd.get_name()}, messages {messages_str}") self.ten.send_cmd(cmd, callback) wait_event.wait() @@ -87,13 +81,13 @@ def callback(_, result): def complete( self, prompt: str, formatted: bool = False, **kwargs: Any ) -> CompletionResponse: - logger.warning("LlamaLLM complete hasn't been implemented yet") + raise NotImplementedError("LlamaLLM complete hasn't been implemented yet") @llm_chat_callback() def stream_chat( self, messages: Sequence[ChatMessage], **kwargs: Any ) -> ChatResponseGen: - logger.debug("LlamaLLM stream_chat start") + self.ten.log_debug("LlamaLLM stream_chat start") cur_tokens = "" resp_queue = queue.Queue() @@ -115,12 +109,12 @@ def callback(_, result): status = result.get_status_code() if status != StatusCode.OK: - logger.warn("LlamaLLM stream_chat callback status {}".format(status)) + self.ten.log_warn(f"LlamaLLM stream_chat callback status {status}") resp_queue.put(None) return cur_tokens = result.get_property_string("text") - logger.debug("LlamaLLM stream_chat callback text [{}]".format(cur_tokens)) + self.ten.log_debug(f"LlamaLLM stream_chat callback text [{cur_tokens}]") resp_queue.put(cur_tokens) if result.get_is_final(): resp_queue.put(None) @@ -130,10 +124,8 @@ def callback(_, result): cmd = Cmd.create("call_chat") cmd.set_property_string("messages", messages_str) cmd.set_property_bool("stream", True) - logger.info( - "LlamaLLM stream_chat send_cmd {}, messages {}".format( - cmd.get_name(), messages_str - ) + self.ten.log_info( + f"LlamaLLM stream_chat send_cmd {cmd.get_name()}, messages {messages_str}" ) self.ten.send_cmd(cmd, callback) return gen() @@ -141,7 +133,9 @@ def callback(_, result): def stream_complete( self, prompt: str, formatted: bool = False, **kwargs: Any ) -> CompletionResponseGen: - logger.warning("LlamaLLM stream_complete hasn't been implemented yet") + raise NotImplementedError( + "LlamaLLM stream_complete hasn't been implemented yet" + ) @classmethod def class_name(cls) -> str: diff --git a/agents/ten_packages/extension/llama_index_chat_engine/llama_retriever.py b/agents/ten_packages/extension/llama_index_chat_engine/llama_retriever.py index 5163f533..7f75e7ae 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/llama_retriever.py +++ b/agents/ten_packages/extension/llama_index_chat_engine/llama_retriever.py @@ -1,10 +1,9 @@ -import time, json, threading +import json, threading from typing import Any, List from llama_index.core.schema import QueryBundle, TextNode from llama_index.core.schema import NodeWithScore from llama_index.core.retrievers import BaseRetriever -from .log import logger from .llama_embedding import LlamaEmbedding from ten import ( TenEnv, @@ -14,13 +13,13 @@ ) -def format_node_result(cmd_result: CmdResult) -> List[NodeWithScore]: - logger.info("LlamaRetriever retrieve response {}".format(cmd_result.to_json())) +def format_node_result(ten: TenEnv, cmd_result: CmdResult) -> List[NodeWithScore]: + ten.log_info(f"LlamaRetriever retrieve response {cmd_result.to_json()}") status = cmd_result.get_status_code() try: contents_json = cmd_result.get_property_to_json("response") except Exception as e: - logger.warning(f"Failed to get response from cmd_result: {e}") + ten.log_warn(f"Failed to get response from cmd_result: {e}") return [ NodeWithScore( node=TextNode(), @@ -56,10 +55,10 @@ def __init__(self, ten: TenEnv, coll: str): self.embed_model = LlamaEmbedding(ten=ten) self.collection_name = coll except Exception as e: - logger.error(f"Failed to initialize LlamaRetriever: {e}") + ten.log_error(f"Failed to initialize LlamaRetriever: {e}") def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: - logger.info("LlamaRetriever retrieve: {}".format(query_bundle.to_json)) + self.ten.log_info(f"LlamaRetriever retrieve: {query_bundle.to_json}") wait_event = threading.Event() resp: List[NodeWithScore] = [] @@ -67,20 +66,18 @@ def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: def cmd_callback(_, result): nonlocal resp nonlocal wait_event - resp = format_node_result(result) + resp = format_node_result(self.ten, result) wait_event.set() - logger.debug("LlamaRetriever callback done") + self.ten.log_debug("LlamaRetriever callback done") embedding = self.embed_model.get_query_embedding(query=query_bundle.query_str) query_cmd = Cmd.create("query_vector") query_cmd.set_property_string("collection_name", self.collection_name) - query_cmd.set_property_int("top_k", 3) # TODO: configable + query_cmd.set_property_int("top_k", 3) query_cmd.set_property_from_json("embedding", json.dumps(embedding)) - logger.info( - "LlamaRetriever send_cmd, collection_name: {}, embedding len: {}".format( - self.collection_name, len(embedding) - ) + self.ten.log_info( + f"LlamaRetriever send_cmd, collection_name: {self.collection_name}, embedding len: {len(embedding)}" ) self.ten.send_cmd(query_cmd, cmd_callback) diff --git a/agents/ten_packages/extension/llama_index_chat_engine/log.py b/agents/ten_packages/extension/llama_index_chat_engine/log.py deleted file mode 100644 index 0804a279..00000000 --- a/agents/ten_packages/extension/llama_index_chat_engine/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("llama_index_chat_engine") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/llama_index_chat_engine/manifest.json b/agents/ten_packages/extension/llama_index_chat_engine/manifest.json index 08a0c643..5fe25a42 100644 --- a/agents/ten_packages/extension/llama_index_chat_engine/manifest.json +++ b/agents/ten_packages/extension/llama_index_chat_engine/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "llama_index_chat_engine", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/message_collector/BUILD.gn b/agents/ten_packages/extension/message_collector/BUILD.gn index 555cf959..82bdfae9 100644 --- a/agents/ten_packages/extension/message_collector/BUILD.gn +++ b/agents/ten_packages/extension/message_collector/BUILD.gn @@ -17,6 +17,5 @@ ten_package("message_collector") { "src/__init__.py", "src/addon.py", "src/extension.py", - "src/log.py", ] } diff --git a/agents/ten_packages/extension/message_collector/manifest.json b/agents/ten_packages/extension/message_collector/manifest.json index dc9d75d2..835c1721 100644 --- a/agents/ten_packages/extension/message_collector/manifest.json +++ b/agents/ten_packages/extension/message_collector/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/message_collector/src/extension.py b/agents/ten_packages/extension/message_collector/src/extension.py index a6f8f9ac..f12152ab 100644 --- a/agents/ten_packages/extension/message_collector/src/extension.py +++ b/agents/ten_packages/extension/message_collector/src/extension.py @@ -37,23 +37,25 @@ MAX_CHUNK_SIZE_BYTES = 1024 -def _text_to_base64_chunks(text: str, msg_id: str) -> list: +def _text_to_base64_chunks(_: TenEnv, text: str, msg_id: str) -> list: # Ensure msg_id does not exceed 50 characters if len(msg_id) > 36: raise ValueError("msg_id cannot exceed 36 characters.") # Convert text to bytearray - byte_array = bytearray(text, 'utf-8') + byte_array = bytearray(text, "utf-8") # Encode the bytearray into base64 - base64_encoded = base64.b64encode(byte_array).decode('utf-8') + base64_encoded = base64.b64encode(byte_array).decode("utf-8") # Initialize list to hold the final chunks chunks = [] # We'll split the base64 string dynamically based on the final byte size part_index = 0 - total_parts = None # We'll calculate total parts once we know how many chunks we create + total_parts = ( + None # We'll calculate total parts once we know how many chunks we create + ) # Process the base64-encoded content in chunks current_position = 0 @@ -68,21 +70,22 @@ def _text_to_base64_chunks(text: str, msg_id: str) -> list: count = 0 while True: # Create the content part of the chunk - content_chunk = base64_encoded[current_position: - current_position + estimated_chunk_size] + content_chunk = base64_encoded[ + current_position : current_position + estimated_chunk_size + ] # Format the chunk formatted_chunk = f"{msg_id}|{part_index}|{total_parts if total_parts else '???'}|{content_chunk}" # Check if the byte length of the formatted chunk exceeds the max allowed size - if len(bytearray(formatted_chunk, 'utf-8')) <= MAX_CHUNK_SIZE_BYTES: + if len(bytearray(formatted_chunk, "utf-8")) <= MAX_CHUNK_SIZE_BYTES: break else: # Reduce the estimated chunk size if the formatted chunk is too large estimated_chunk_size -= 100 # Reduce content size gradually count += 1 - # logger.debug(f"chunk estimate guess: {count}") + # ten_env.log_debug(f"chunk estimate guess: {count}") # Add the current chunk to the list chunks.append(formatted_chunk) @@ -91,16 +94,16 @@ def _text_to_base64_chunks(text: str, msg_id: str) -> list: # Now that we know the total number of parts, update the chunks with correct total_parts total_parts = len(chunks) - updated_chunks = [ - chunk.replace("???", str(total_parts)) for chunk in chunks - ] + updated_chunks = [chunk.replace("???", str(total_parts)) for chunk in chunks] return updated_chunks class MessageCollectorExtension(Extension): - # Create the queue for message processing - queue = asyncio.Queue() + def __init__(self, name: str): + super().__init__(name) + self.queue = asyncio.Queue() + self.loop = None def on_init(self, ten_env: TenEnv) -> None: ten_env.log_info("on_init") @@ -115,6 +118,7 @@ def on_start(self, ten_env: TenEnv) -> None: def start_loop(): asyncio.set_event_loop(self.loop) self.loop.run_forever() + threading.Thread(target=start_loop, args=[]).start() self.loop.create_task(self._process_queue(ten_env)) @@ -165,17 +169,16 @@ def on_data(self, ten_env: TenEnv, data: Data) -> None: try: final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) - except Exception as e: + except Exception: pass try: stream_id = data.get_property_int(TEXT_DATA_STREAM_ID_FIELD) - except Exception as e: + except Exception: pass try: - end_of_segment = data.get_property_bool( - TEXT_DATA_END_OF_SEGMENT_FIELD) + end_of_segment = data.get_property_bool(TEXT_DATA_END_OF_SEGMENT_FIELD) except Exception as e: ten_env.log_warn( f"on_data get_property_bool {TEXT_DATA_END_OF_SEGMENT_FIELD} error: {e}" @@ -212,11 +215,9 @@ def on_data(self, ten_env: TenEnv, data: Data) -> None: } try: - chunks = _text_to_base64_chunks( - json.dumps(base_msg_data), message_id) + chunks = _text_to_base64_chunks(ten_env, json.dumps(base_msg_data), message_id) for chunk in chunks: - asyncio.run_coroutine_threadsafe( - self._queue_message(chunk), self.loop) + asyncio.run_coroutine_threadsafe(self._queue_message(chunk), self.loop) except Exception as e: ten_env.log_warn(f"on_data new_data error: {e}") diff --git a/agents/ten_packages/extension/message_collector_rtm/manifest.json b/agents/ten_packages/extension/message_collector_rtm/manifest.json index 506f2e01..b130f21f 100644 --- a/agents/ten_packages/extension/message_collector_rtm/manifest.json +++ b/agents/ten_packages/extension/message_collector_rtm/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/message_collector_rtm/src/addon.py b/agents/ten_packages/extension/message_collector_rtm/src/addon.py index 1602995b..c7800e34 100644 --- a/agents/ten_packages/extension/message_collector_rtm/src/addon.py +++ b/agents/ten_packages/extension/message_collector_rtm/src/addon.py @@ -13,9 +13,10 @@ @register_addon_as_extension("message_collector_rtm") -class MessageCollectorRTMExtension(Addon): +class MessageCollectorRTMExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: from .extension import MessageCollectorRTMExtension + ten_env.log_info("MessageCollectorRTMExtensionAddon on_create_instance") ten_env.on_create_instance_done(MessageCollectorRTMExtension(name), context) diff --git a/agents/ten_packages/extension/message_collector_rtm/src/extension.py b/agents/ten_packages/extension/message_collector_rtm/src/extension.py index 5b821ed7..d24d1ffd 100644 --- a/agents/ten_packages/extension/message_collector_rtm/src/extension.py +++ b/agents/ten_packages/extension/message_collector_rtm/src/extension.py @@ -26,6 +26,7 @@ TEXT_DATA_STREAM_ID_FIELD = "stream_id" TEXT_DATA_END_OF_SEGMENT_FIELD = "end_of_segment" + class MessageCollectorRTMExtension(AsyncExtension): # Create the queue for message processing def __init__(self, name: str): @@ -35,7 +36,6 @@ def __init__(self, name: str): self.loop = None self.ten_env = None self.stopped = False - async def on_init(self, ten_env: AsyncTenEnv) -> None: ten_env.log_info("MessageCollectorRTMExtension on_init") @@ -61,14 +61,14 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: if cmd_name == "on_user_audio_track_state_changed": await self.handle_user_state_changed(cmd) else: - ten_env.log_warn(f"unsupported cmd {cmd_name}") + ten_env.log_warn(f"unsupported cmd {cmd_name}") cmd_result = CmdResult.create(StatusCode.OK) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) except Exception as e: ten_env.log_error(f"on_cmd error: {e}") cmd_result = CmdResult.create(StatusCode.ERROR) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: """ @@ -89,10 +89,14 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: else: ten_env.log_warn(f"unsupported data {data_name}") - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: pass - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + async def on_video_frame( + self, ten_env: AsyncTenEnv, video_frame: VideoFrame + ) -> None: pass async def on_text_data(self, data: Data) -> None: @@ -110,12 +114,12 @@ async def on_text_data(self, data: Data) -> None: try: final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) - except Exception as e: + except Exception: pass try: stream_id = data.get_property_int(TEXT_DATA_STREAM_ID_FIELD) - except Exception as e: + except Exception: pass try: @@ -156,13 +160,13 @@ async def on_text_data(self, data: Data) -> None: await self._queue_message("text_data", text_data) async def on_rtm_message_event(self, data: Data) -> None: - self.ten_env.log_debug(f"on_data rtm_message_event") + self.ten_env.log_debug("on_data rtm_message_event") try: text = data.get_property_string("message") data = Data.create("text_data") data.set_property_string("text", text) data.set_property_bool("is_final", True) - self.ten_env.send_data(data) + asyncio.create_task(self.ten_env.send_data(data)) except Exception as e: self.ten_env.log_error(f"Failed to handle on_rtm_message_event data: {e}") @@ -208,7 +212,7 @@ async def _process_queue(self): async def _handle_text_data(self, data: dict): try: self.ten_env.log_debug(f"Handling text data: {data}") - json_bytes = json.dumps(data).encode('utf-8') + json_bytes = json.dumps(data).encode("utf-8") cmd = Cmd.create("publish") cmd.set_property_buf("message", json_bytes) cmd_result: CmdResult = await self.ten_env.send_cmd(cmd) diff --git a/agents/ten_packages/extension/minimax_tts/manifest.json b/agents/ten_packages/extension/minimax_tts/manifest.json index 57d10616..0d583283 100644 --- a/agents/ten_packages/extension/minimax_tts/manifest.json +++ b/agents/ten_packages/extension/minimax_tts/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/minimax_tts/minimax_tts.go b/agents/ten_packages/extension/minimax_tts/minimax_tts.go index 003f5778..3d22005f 100644 --- a/agents/ten_packages/extension/minimax_tts/minimax_tts.go +++ b/agents/ten_packages/extension/minimax_tts/minimax_tts.go @@ -15,10 +15,11 @@ import ( "encoding/json" "fmt" "io" - "log/slog" "net/http" "time" + "ten_framework/ten" + "github.com/go-resty/resty/v2" ) @@ -58,8 +59,8 @@ func newMinimaxTTS(config minimaxTTSConfig) (*minimaxTTS, error) { }, nil } -func (e *minimaxTTS) textToSpeechStream(streamWriter io.Writer, text string) (err error) { - slog.Debug("textToSpeechStream start tts", "text", text) +func (e *minimaxTTS) textToSpeechStream(tenEnv ten.TenEnv, streamWriter io.Writer, text string) (err error) { + tenEnv.LogDebug("textToSpeechStream start tts") payload := map[string]any{ "audio_setting": map[string]any{ @@ -89,19 +90,19 @@ func (e *minimaxTTS) textToSpeechStream(streamWriter io.Writer, text string) (er Post(fmt.Sprintf("%s?GroupId=%s", e.config.Url, e.config.GroupId)) if err != nil { - slog.Error("request failed", "err", err, "text", text) + tenEnv.LogError(fmt.Sprintf("request failed, err: %v, text: %s", err, text)) return fmt.Errorf("textToSpeechStream failed, err: %v", err) } defer func() { resp.RawBody().Close() - slog.Debug("textToSpeechStream close response", "err", err, "text", text) + tenEnv.LogDebug(fmt.Sprintf("textToSpeechStream close response, err: %v, text: %s", err, text)) }() // Check the response status code if resp.StatusCode() != http.StatusOK { - slog.Error("unexpected response status", "status", resp.StatusCode()) + tenEnv.LogError(fmt.Sprintf("unexpected response status: %d", resp.StatusCode())) return fmt.Errorf("unexpected response status: %d", resp.StatusCode()) } @@ -113,12 +114,12 @@ func (e *minimaxTTS) textToSpeechStream(streamWriter io.Writer, text string) (er break } - slog.Error("failed to read line", "error", err) + tenEnv.LogError(fmt.Sprintf("failed to read line: %v", err)) return err } if !bytes.HasPrefix(line, []byte("data:")) { - slog.Debug("drop chunk", "text", text, "line", line) + tenEnv.LogDebug(fmt.Sprintf("drop chunk, text: %s, line: %s", text, line)) continue } @@ -135,7 +136,7 @@ func (e *minimaxTTS) textToSpeechStream(streamWriter io.Writer, text string) (er } if err = json.Unmarshal(line[5:], &chunk); err != nil { - slog.Error("failed to decode JSON chunk", "err", err) + tenEnv.LogError(fmt.Sprintf("failed to decode JSON chunk: %v", err)) break } @@ -145,13 +146,13 @@ func (e *minimaxTTS) textToSpeechStream(streamWriter io.Writer, text string) (er audioData, err := hex.DecodeString(chunk.Data.Audio) if err != nil { - slog.Error("failed to decode audio data", "err", err, "traceId", chunk.TraceId, "BaseResp", chunk.BaseResp) + tenEnv.LogError(fmt.Sprintf("failed to decode audio data: %v, traceId: %s, BaseResp: %v", err, chunk.TraceId, chunk.BaseResp)) break } _, err = streamWriter.Write(audioData) if err != nil { - slog.Error("failed to write to streamWriter", "err", err, "traceId", chunk.TraceId, "BaseResp", chunk.BaseResp) + tenEnv.LogError(fmt.Sprintf("failed to write to streamWriter: %v, traceId: %s, BaseResp: %v", err, chunk.TraceId, chunk.BaseResp)) break } } diff --git a/agents/ten_packages/extension/minimax_tts/minimax_tts_extension.go b/agents/ten_packages/extension/minimax_tts/minimax_tts_extension.go index 867bcedb..7bf132c3 100644 --- a/agents/ten_packages/extension/minimax_tts/minimax_tts_extension.go +++ b/agents/ten_packages/extension/minimax_tts/minimax_tts_extension.go @@ -11,7 +11,6 @@ package extension import ( "fmt" "io" - "log/slog" "sync" "sync/atomic" "time" @@ -38,8 +37,6 @@ const ( ) var ( - logTag = slog.String("extension", "MINIMAX_TTS_EXTENSION") - outdateTs atomic.Int64 textChan chan *message wg sync.WaitGroup @@ -70,27 +67,27 @@ func newMinimaxTTSExtension(name string) ten.Extension { // - url // - voice_id func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { - slog.Info("OnStart", logTag) + ten.LogInfo("OnStart") // prepare configuration minimaxTTSConfig := defaultMinimaxTTSConfig() if apiKey, err := ten.GetPropertyString(propertyApiKey); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + ten.LogError(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err)) return } else { minimaxTTSConfig.ApiKey = apiKey } if groupId, err := ten.GetPropertyString(propertyGroupId); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyGroupId, err), logTag) + ten.LogError(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyGroupId, err)) return } else { minimaxTTSConfig.GroupId = groupId } if model, err := ten.GetPropertyString(propertyModel); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModel, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModel, err)) } else { if len(model) > 0 { minimaxTTSConfig.Model = model @@ -98,7 +95,7 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { } if requestTimeoutSeconds, err := ten.GetPropertyInt64(propertyRequestTimeoutSeconds); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err)) } else { if requestTimeoutSeconds > 0 { minimaxTTSConfig.RequestTimeoutSeconds = int(requestTimeoutSeconds) @@ -106,7 +103,7 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { } if sampleRate, err := ten.GetPropertyInt64(propertySampleRate); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySampleRate, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySampleRate, err)) } else { if sampleRate > 0 { minimaxTTSConfig.SampleRate = int32(sampleRate) @@ -114,7 +111,7 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { } if url, err := ten.GetPropertyString(propertyUrl); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyUrl, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyUrl, err)) } else { if len(url) > 0 { minimaxTTSConfig.Url = url @@ -122,7 +119,7 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { } if voiceId, err := ten.GetPropertyString(propertyVoiceId); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyVoiceId, err), logTag) + ten.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyVoiceId, err)) } else { minimaxTTSConfig.VoiceId = voiceId } @@ -130,11 +127,11 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { // create minimaxTTS instance minimaxTTS, err := newMinimaxTTS(minimaxTTSConfig) if err != nil { - slog.Error(fmt.Sprintf("newMinimaxTTS failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("newMinimaxTTS failed, err: %v", err)) return } - slog.Info(fmt.Sprintf("newMinimaxTTS succeed with Model: %s", minimaxTTSConfig.Model), logTag) + ten.LogInfo(fmt.Sprintf("newMinimaxTTS succeed with Model: %s", minimaxTTSConfig.Model)) // set minimaxTTS instance e.minimaxTTS = minimaxTTS @@ -150,17 +147,17 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { textChan = make(chan *message, textChanMax) go func() { - slog.Info("process textChan", logTag) + ten.LogInfo("process textChan") for msg := range textChan { if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) + ten.LogInfo(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load())) continue } wg.Add(1) - slog.Info(fmt.Sprintf("textChan text: [%s]", msg.text), logTag) + ten.LogInfo(fmt.Sprintf("textChan text: [%s]", msg.text)) r, w := io.Pipe() startTime := time.Now() @@ -169,16 +166,16 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { defer wg.Done() defer w.Close() - slog.Info(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text), logTag) - err = e.minimaxTTS.textToSpeechStream(w, msg.text) - slog.Info(fmt.Sprintf("textToSpeechStream result: [%v]", err), logTag) + ten.LogInfo(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text)) + err = e.minimaxTTS.textToSpeechStream(ten, w, msg.text) + ten.LogInfo(fmt.Sprintf("textToSpeechStream result: [%v]", err)) if err != nil { - slog.Error(fmt.Sprintf("textToSpeechStream failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("textToSpeechStream failed, err: %v", err)) return } }() - slog.Info(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize), logTag) + ten.LogInfo(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize)) var ( firstFrameLatency int64 @@ -192,8 +189,8 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { // read pcm stream for { if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) + ten.LogInfo(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load())) break } @@ -203,16 +200,16 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { if err != nil { if err == io.EOF { - slog.Info("read pcm stream EOF", logTag) + ten.LogInfo("read pcm stream EOF") break } - slog.Error(fmt.Sprintf("read pcm stream failed, err: %v", err), logTag) + ten.LogError(fmt.Sprintf("read pcm stream failed, err: %v", err)) break } if pcmFrameRead != pcmFrameSize { - slog.Debug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead), logTag) + ten.LogDebug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead)) continue } @@ -224,21 +221,21 @@ func (e *minimaxTTSExtension) OnStart(ten ten.TenEnv) { if firstFrameLatency == 0 { firstFrameLatency = time.Since(startTime).Milliseconds() - slog.Info(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency), logTag) + ten.LogInfo(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency)) } - slog.Debug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text), logTag) + ten.LogDebug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text)) } if pcmFrameRead > 0 { pcm.send(ten, buf) sentFrames++ - slog.Info(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead), logTag) + ten.LogInfo(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead)) } r.Close() - slog.Info(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", - msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds()), logTag) + ten.LogInfo(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", + msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds())) } }() @@ -256,13 +253,13 @@ func (e *minimaxTTSExtension) OnCmd( ) { cmdName, err := cmd.GetName() if err != nil { - slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("OnCmd get name failed, err: %v", err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } - slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnCmd %s", cmdInFlush)) switch cmdName { case cmdInFlush: @@ -271,24 +268,24 @@ func (e *minimaxTTSExtension) OnCmd( // send out outCmd, err := ten.NewCmd(cmdOutFlush) if err != nil { - slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } if err := tenEnv.SendCmd(outCmd, nil); err != nil { - slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } else { - slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("cmd %s sent", cmdOutFlush)) } } cmdResult, _ := ten.NewCmdResult(ten.StatusCodeOk) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) } // OnData receives data from ten graph. @@ -302,16 +299,16 @@ func (e *minimaxTTSExtension) OnData( ) { text, err := data.GetPropertyString(dataInTextDataPropertyText) if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err)) return } if len(text) == 0 { - slog.Debug("OnData text is empty, ignored", logTag) + tenEnv.LogDebug("OnData text is empty, ignored") return } - slog.Info(fmt.Sprintf("OnData input text: [%s]", text), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnData input text: [%s]", text)) go func() { textChan <- &message{text: text, receivedTs: time.Now().UnixMicro()} @@ -319,8 +316,6 @@ func (e *minimaxTTSExtension) OnData( } func init() { - slog.Info("minimax_tts extension init", logTag) - // Register addon ten.RegisterAddonAsExtension( "minimax_tts", diff --git a/agents/ten_packages/extension/minimax_tts/pcm.go b/agents/ten_packages/extension/minimax_tts/pcm.go index bb7072ef..08b95464 100644 --- a/agents/ten_packages/extension/minimax_tts/pcm.go +++ b/agents/ten_packages/extension/minimax_tts/pcm.go @@ -10,7 +10,6 @@ package extension import ( "fmt" - "log/slog" "ten_framework/ten" ) @@ -47,10 +46,10 @@ func newPcm(config *pcmConfig) *pcm { } } -func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { +func (p *pcm) getPcmFrame(tenEnv ten.TenEnv, buf []byte) (pcmFrame ten.AudioFrame, err error) { pcmFrame, err = ten.NewAudioFrame(p.config.Name) if err != nil { - slog.Error(fmt.Sprintf("NewAudioFrame failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("NewAudioFrame failed, err: %v", err)) return } @@ -66,7 +65,7 @@ func (p *pcm) getPcmFrame(buf []byte) (pcmFrame ten.AudioFrame, err error) { borrowedBuf, err := pcmFrame.LockBuf() if err != nil { - slog.Error(fmt.Sprintf("LockBuf failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("LockBuf failed, err: %v", err)) return } @@ -86,15 +85,15 @@ func (p *pcm) newBuf() []byte { } func (p *pcm) send(tenEnv ten.TenEnv, buf []byte) (err error) { - pcmFrame, err := p.getPcmFrame(buf) + pcmFrame, err := p.getPcmFrame(tenEnv, buf) if err != nil { - slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("getPcmFrame failed, err: %v", err)) return } // send pcm - if err = tenEnv.SendAudioFrame(pcmFrame); err != nil { - slog.Error(fmt.Sprintf("SendAudioFrame failed, err: %v", err), logTag) + if err = tenEnv.SendAudioFrame(pcmFrame, nil); err != nil { + tenEnv.LogError(fmt.Sprintf("SendAudioFrame failed, err: %v", err)) return } diff --git a/agents/ten_packages/extension/minimax_tts_python/extension.py b/agents/ten_packages/extension/minimax_tts_python/extension.py index 052ce250..41ee8a4f 100644 --- a/agents/ten_packages/extension/minimax_tts_python/extension.py +++ b/agents/ten_packages/extension/minimax_tts_python/extension.py @@ -4,7 +4,6 @@ # See the LICENSE file for more information. # import traceback -from ten.data import Data from ten_ai_base.tts import AsyncTTSBaseExtension from .minimax_tts import MinimaxTTS, MinimaxTTSConfig from ten import ( @@ -25,7 +24,7 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: await super().on_start(ten_env) ten_env.log_debug("on_start") - config = MinimaxTTSConfig.create(ten_env=ten_env) + config = await MinimaxTTSConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {config.api_key}, {config.group_id}") @@ -38,18 +37,20 @@ async def on_stop(self, ten_env: AsyncTenEnv) -> None: await super().on_stop(ten_env) ten_env.log_debug("on_stop") - # TODO: clean up resources - async def on_deinit(self, ten_env: AsyncTenEnv) -> None: await super().on_deinit(ten_env) ten_env.log_debug("on_deinit") - async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + async def on_request_tts( + self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool + ) -> None: try: data = self.client.get(ten_env, input_text) async for frame in data: - self.send_audio_out(ten_env, frame, sample_rate=self.client.config.sample_rate) - except Exception as err: + await self.send_audio_out( + ten_env, frame, sample_rate=self.client.config.sample_rate + ) + except Exception: ten_env.log_error(f"on_request_tts failed: {traceback.format_exc()}") async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: diff --git a/agents/ten_packages/extension/minimax_tts_python/manifest.json b/agents/ten_packages/extension/minimax_tts_python/manifest.json index 12913aa9..bf9c2f02 100644 --- a/agents/ten_packages/extension/minimax_tts_python/manifest.json +++ b/agents/ten_packages/extension/minimax_tts_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "minimax_tts_python", - "version": "0.4.2", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4.2" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/minimax_tts_python/minimax_tts.py b/agents/ten_packages/extension/minimax_tts_python/minimax_tts.py index 6f0184c9..d4715bd1 100644 --- a/agents/ten_packages/extension/minimax_tts_python/minimax_tts.py +++ b/agents/ten_packages/extension/minimax_tts_python/minimax_tts.py @@ -1,6 +1,5 @@ import asyncio from dataclasses import dataclass -import traceback import aiohttp import json from datetime import datetime @@ -9,6 +8,7 @@ from ten.async_ten_env import AsyncTenEnv from ten_ai_base.config import BaseConfig + @dataclass class MinimaxTTSConfig(BaseConfig): api_key: str = "" @@ -24,33 +24,32 @@ class MinimaxTTS: def __init__(self, config: MinimaxTTSConfig): self.config = config - async def get(self, ten_env: AsyncTenEnv, text: str) -> AsyncIterator[bytes]: - payload = json.dumps({ - "model": self.config.model, - "text": text, - "stream": True, - "voice_setting": { - "voice_id": self.config.voice_id, - "speed": 1.0, - "vol": 1.0, - "pitch": 0 - }, - "pronunciation_dict": { - "tone": [] - }, - "audio_setting": { - "sample_rate": self.config.sample_rate, - "format": "pcm", - "channel": 1 + payload = json.dumps( + { + "model": self.config.model, + "text": text, + "stream": True, + "voice_setting": { + "voice_id": self.config.voice_id, + "speed": 1.0, + "vol": 1.0, + "pitch": 0, + }, + "pronunciation_dict": {"tone": []}, + "audio_setting": { + "sample_rate": self.config.sample_rate, + "format": "pcm", + "channel": 1, + }, } - }) + ) url = f"{self.config.url}?GroupId={self.config.group_id}" headers = { "accept": "application/json, text/plain, */*", "Authorization": f"Bearer {self.config.api_key}", - "Content-Type": "application/json" + "Content-Type": "application/json", } start_time = datetime.now() @@ -65,33 +64,44 @@ async def get(self, ten_env: AsyncTenEnv, text: str) -> AsyncIterator[bytes]: try: trace_id = response.headers.get("Trace-Id") - except: + except Exception: ten_env.log_warn("get response, no Trace-Id") try: alb_receive_time = response.headers.get("alb_receive_time") - except: + except Exception: ten_env.log_warn("get response, no alb_receive_time") - ten_env.log_info(f"get response trace-id: {trace_id}, alb_receive_time: {alb_receive_time}, cost_time {self._duration_in_ms_since(start_time)}ms") + ten_env.log_info( + f"get response trace-id: {trace_id}, alb_receive_time: {alb_receive_time}, cost_time {self._duration_in_ms_since(start_time)}ms" + ) if response.status != 200: - raise Exception(f"Request failed with status {response.status}") + raise RuntimeError( + f"Request failed with status {response.status}" + ) buffer = b"" - async for chunk in response.content.iter_chunked(1024): # Read in 1024 byte chunks + async for chunk in response.content.iter_chunked( + 1024 + ): # Read in 1024 byte chunks buffer += chunk # Split the buffer into lines based on newline character - while b'\n' in buffer: - line, buffer = buffer.split(b'\n', 1) - + while b"\n" in buffer: + line, buffer = buffer.split(b"\n", 1) + # Process only lines that start with "data:" - if line.startswith(b'data:'): + if line.startswith(b"data:"): try: - json_data = json.loads(line[5:].decode('utf-8').strip()) - + json_data = json.loads( + line[5:].decode("utf-8").strip() + ) + # Check for the required keys in the JSON data - if "data" in json_data and "extra_info" not in json_data: + if ( + "data" in json_data + and "extra_info" not in json_data + ): audio = json_data["data"].get("audio") if audio: decoded_hex = bytes.fromhex(audio) @@ -108,10 +118,12 @@ async def get(self, ten_env: AsyncTenEnv, text: str) -> AsyncIterator[bytes]: except asyncio.TimeoutError: ten_env.log_error("Request timed out") finally: - ten_env.log_info(f"http loop done, cost_time {self._duration_in_ms_since(start_time)}ms") + ten_env.log_info( + f"http loop done, cost_time {self._duration_in_ms_since(start_time)}ms" + ) def _duration_in_ms(self, start: datetime, end: datetime) -> int: return int((end - start).total_seconds() * 1000) - + def _duration_in_ms_since(self, start: datetime) -> int: return self._duration_in_ms(start, datetime.now()) diff --git a/agents/ten_packages/extension/minimax_v2v_python/extension.py b/agents/ten_packages/extension/minimax_v2v_python/extension.py index 64143b69..6612b0af 100644 --- a/agents/ten_packages/extension/minimax_v2v_python/extension.py +++ b/agents/ten_packages/extension/minimax_v2v_python/extension.py @@ -22,7 +22,7 @@ from datetime import datetime import aiofiles import asyncio -from typing import Iterator, List, Dict, Tuple, Any +from typing import List, Dict, Tuple, Any import base64 import json @@ -43,24 +43,24 @@ class MinimaxV2VConfig: max_memory_length: int = 10 dump: bool = False - def read_from_property(self, ten_env: AsyncTenEnv): + async def read_from_property(self, ten_env: AsyncTenEnv): for field in fields(self): - # TODO: 'is_property_exist' has a bug that can not be used in async extension currently, use it instead of try .. except once fixed + # 'is_property_exist' has a bug that can not be used in async extension currently, use it instead of try .. except once fixed # if not ten_env.is_property_exist(field.name): # continue try: match field.type: case builtins.str: - val = ten_env.get_property_string(field.name) + val = await ten_env.get_property_string(field.name) if val: setattr(self, field.name, val) ten_env.log_info(f"{field.name}={val}") case builtins.int: - val = ten_env.get_property_int(field.name) + val = await ten_env.get_property_int(field.name) setattr(self, field.name, val) ten_env.log_info(f"{field.name}={val}") case builtins.bool: - val = ten_env.get_property_bool(field.name) + val = await ten_env.get_property_bool(field.name) setattr(self, field.name, val) ten_env.log_info(f"{field.name}={val}") case _: @@ -80,24 +80,23 @@ def __init__(self, name: str) -> None: self.ten_env = None # able to cancel - self.curr_task = None + self.curr_task = None # make sure tasks processing in order self.process_input_task = None self.queue = asyncio.Queue() async def on_init(self, ten_env: AsyncTenEnv) -> None: - self.config.read_from_property(ten_env=ten_env) + await self.config.read_from_property(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") self.memory = ChatMemory(self.config.max_memory_length) self.ten_env = ten_env - ten_env.on_init_done() async def on_start(self, ten_env: AsyncTenEnv) -> None: - self.process_input_task = asyncio.create_task(self._process_input(ten_env=ten_env, queue=self.queue), name="process_input") - - ten_env.on_start_done() + self.process_input_task = asyncio.create_task( + self._process_input(ten_env=ten_env, queue=self.queue), name="process_input" + ) async def on_stop(self, ten_env: AsyncTenEnv) -> None: @@ -108,8 +107,6 @@ async def on_stop(self, ten_env: AsyncTenEnv) -> None: await asyncio.gather(self.process_input_task, return_exceptions=True) self.process_input_task = None - ten_env.on_stop_done() - async def on_deinit(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_deinit") @@ -117,7 +114,6 @@ async def on_deinit(self, ten_env: AsyncTenEnv) -> None: await self.client.aclose() self.client = None self.ten_env = None - ten_env.on_deinit_done() async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: try: @@ -132,10 +128,10 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: ten_env.log_debug("flush done") case _: pass - ten_env.return_result(CmdResult.create(StatusCode.OK), cmd) + await ten_env.return_result(CmdResult.create(StatusCode.OK), cmd) except asyncio.CancelledError: ten_env.log_warn(f"cmd {cmd_name} cancelled") - ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) + await ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) raise except Exception as e: ten_env.log_warn(f"cmd {cmd_name} failed, err {e}") @@ -165,10 +161,10 @@ async def on_audio_frame( # dump input audio if need await self._dump_audio_if_need(frame_buf, "in") - + # ten_env.log_debug(f"on audio frame {len(frame_buf)} {stream_id} put done") except asyncio.CancelledError: - ten_env.log_warn(f"on audio frame cancelled") + ten_env.log_warn("on audio frame cancelled") raise except Exception as e: ten_env.log_error(f"on audio frame failed, err {e}") @@ -190,7 +186,9 @@ async def _process_input(self, ten_env: AsyncTenEnv, queue: asyncio.Queue): ten_env.log_debug(f"start process task {ts} {len(frame_buf)}") try: - self.curr_task = asyncio.create_task(self._complete_with_history(ts, frame_buf)) + self.curr_task = asyncio.create_task( + self._complete_with_history(ts, frame_buf) + ) await self.curr_task self.curr_task = None except asyncio.CancelledError: @@ -202,9 +200,7 @@ async def _process_input(self, ten_env: AsyncTenEnv, queue: asyncio.Queue): ten_env.log_info("process_input exit") - async def _complete_with_history( - self, ts: datetime, buff: bytearray - ): + async def _complete_with_history(self, ts: datetime, buff: bytearray): start_time = datetime.now() ten_env = self.ten_env ten_env.log_debug( @@ -249,7 +245,7 @@ async def _complete_with_history( i = 0 async for line in response.aiter_lines(): - # logger.info(f"-> line {line}") + # ten_env.log_info(f"-> line {line}") # if self._need_interrupt(ts): # ten_env.log_warn(f"trace-id: {trace_id}, interrupted") # if self.transcript: @@ -311,7 +307,9 @@ async def _complete_with_history( base64_str = delta["audio_content"] buff = base64.b64decode(base64_str) await self._dump_audio_if_need(buff, "out") - self._send_audio_frame(ten_env=ten_env, audio_data=buff) + await self._send_audio_frame( + ten_env=ten_env, audio_data=buff + ) # tool calls if delta.get("tool_calls"): @@ -398,7 +396,6 @@ def _create_request( payload = { "model": config.model, "messages": messages, - "tools": [], "tool_choice": "none", "stream": True, "stream_options": {"speech_output": True}, # 开启语音输出 @@ -420,7 +417,9 @@ def _create_request( return (headers, payload) - def _send_audio_frame(self, ten_env: AsyncTenEnv, audio_data: bytearray) -> None: + async def _send_audio_frame( + self, ten_env: AsyncTenEnv, audio_data: bytearray + ) -> None: try: f = AudioFrame.create("pcm_frame") f.set_sample_rate(self.config.out_sample_rate) @@ -432,7 +431,7 @@ def _send_audio_frame(self, ten_env: AsyncTenEnv, audio_data: bytearray) -> None buff = f.lock_buf() buff[:] = audio_data f.unlock_buf(buff) - ten_env.send_audio_frame(f) + await ten_env.send_audio_frame(f) except Exception as e: ten_env.log_error(f"send audio frame failed, err {e}") @@ -455,7 +454,7 @@ def _send_transcript( ten_env.log_info( f"send transcript text [{content}] {stream_id} end_of_segment {end_of_segment} role {role}" ) - self.ten_env.send_data(d) + asyncio.create_task(self.ten_env.send_data(d)) except Exception as e: ten_env.log_warn( f"send transcript text [{content}] {stream_id} end_of_segment {end_of_segment} role {role} failed, err {e}" @@ -468,7 +467,7 @@ async def _flush(self, ten_env: AsyncTenEnv) -> None: self.queue.get_nowait() self.queue.task_done() except Exception as e: - ten_env.log_warn("flush queue error {e}") + ten_env.log_warn(f"flush queue error {e}") # cancel current task if self.curr_task: diff --git a/agents/ten_packages/extension/minimax_v2v_python/manifest.json b/agents/ten_packages/extension/minimax_v2v_python/manifest.json index 6680d45d..d31df6c7 100644 --- a/agents/ten_packages/extension/minimax_v2v_python/manifest.json +++ b/agents/ten_packages/extension/minimax_v2v_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/openai_chatgpt/manifest.json b/agents/ten_packages/extension/openai_chatgpt/manifest.json index 4628343f..4e678221 100644 --- a/agents/ten_packages/extension/openai_chatgpt/manifest.json +++ b/agents/ten_packages/extension/openai_chatgpt/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "openai_chatgpt", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_go", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/openai_chatgpt/openai_chatgpt_extension.go b/agents/ten_packages/extension/openai_chatgpt/openai_chatgpt_extension.go index 2a1cd294..97ac2b9f 100644 --- a/agents/ten_packages/extension/openai_chatgpt/openai_chatgpt_extension.go +++ b/agents/ten_packages/extension/openai_chatgpt/openai_chatgpt_extension.go @@ -14,7 +14,6 @@ import ( "errors" "fmt" "io" - "log/slog" "sync" "sync/atomic" "time" @@ -24,10 +23,6 @@ import ( openai "github.com/sashabaranov/go-openai" ) -var ( - logTag = slog.String("extension", "OPENAI_CHATGPT_EXTENSION") -) - type openaiChatGPTExtension struct { ten.DefaultExtension openaiChatGPT *openaiChatGPT @@ -82,13 +77,13 @@ func newChatGPTExtension(name string) ten.Extension { // - greeting // - proxy_url func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { - slog.Info("OnStart", logTag) + tenEnv.LogInfo("OnStart") // prepare configuration openaiChatGPTConfig := defaultOpenaiChatGPTConfig() if baseUrl, err := tenEnv.GetPropertyString(propertyBaseUrl); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyBaseUrl, err), logTag) + tenEnv.LogError(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyBaseUrl, err)) } else { if len(baseUrl) > 0 { openaiChatGPTConfig.BaseUrl = baseUrl @@ -96,14 +91,14 @@ func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { } if apiKey, err := tenEnv.GetPropertyString(propertyApiKey); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + tenEnv.LogError(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err)) return } else { openaiChatGPTConfig.ApiKey = apiKey } if model, err := tenEnv.GetPropertyString(propertyModel); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err)) } else { if len(model) > 0 { openaiChatGPTConfig.Model = model @@ -111,7 +106,7 @@ func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { } if prompt, err := tenEnv.GetPropertyString(propertyPrompt); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err)) } else { if len(prompt) > 0 { openaiChatGPTConfig.Prompt = prompt @@ -119,31 +114,31 @@ func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { } if frequencyPenalty, err := tenEnv.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyFrequencyPenalty, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyFrequencyPenalty, err)) } else { openaiChatGPTConfig.FrequencyPenalty = float32(frequencyPenalty) } if presencePenalty, err := tenEnv.GetPropertyFloat64(propertyPresencePenalty); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyPresencePenalty, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyPresencePenalty, err)) } else { openaiChatGPTConfig.PresencePenalty = float32(presencePenalty) } if temperature, err := tenEnv.GetPropertyFloat64(propertyTemperature); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err)) } else { openaiChatGPTConfig.Temperature = float32(temperature) } if topP, err := tenEnv.GetPropertyFloat64(propertyTopP); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err)) } else { openaiChatGPTConfig.TopP = float32(topP) } if maxTokens, err := tenEnv.GetPropertyInt64(propertyMaxTokens); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err)) } else { if maxTokens > 0 { openaiChatGPTConfig.MaxTokens = int(maxTokens) @@ -151,18 +146,18 @@ func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { } if proxyUrl, err := tenEnv.GetPropertyString(propertyProxyUrl); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyProxyUrl, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyProxyUrl, err)) } else { openaiChatGPTConfig.ProxyUrl = proxyUrl } greeting, err := tenEnv.GetPropertyString(propertyGreeting) if err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err)) } if propMaxMemoryLength, err := tenEnv.GetPropertyInt64(propertyMaxMemoryLength); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err)) } else { if propMaxMemoryLength > 0 { maxMemoryLength = int(propMaxMemoryLength) @@ -172,11 +167,11 @@ func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { // create openaiChatGPT instance openaiChatgpt, err := newOpenaiChatGPT(openaiChatGPTConfig) if err != nil { - slog.Error(fmt.Sprintf("newOpenaiChatGPT failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("newOpenaiChatGPT failed, err: %v", err)) return } - slog.Info(fmt.Sprintf("newOpenaiChatGPT succeed with max_tokens: %d, model: %s", - openaiChatGPTConfig.MaxTokens, openaiChatGPTConfig.Model), logTag) + tenEnv.LogInfo(fmt.Sprintf("newOpenaiChatGPT succeed with max_tokens: %d, model: %s", + openaiChatGPTConfig.MaxTokens, openaiChatGPTConfig.Model)) p.openaiChatGPT = openaiChatgpt @@ -187,10 +182,10 @@ func (p *openaiChatGPTExtension) OnStart(tenEnv ten.TenEnv) { outputData, _ := ten.NewData("text_data") outputData.SetProperty(dataOutTextDataPropertyText, greeting) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := tenEnv.SendData(outputData); err != nil { - slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) + if err := tenEnv.SendData(outputData, nil); err != nil { + tenEnv.LogError(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err)) } else { - slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) + tenEnv.LogInfo(fmt.Sprintf("greeting [%s] sent", greeting)) } } @@ -208,12 +203,12 @@ func (p *openaiChatGPTExtension) OnCmd( ) { cmdName, err := cmd.GetName() if err != nil { - slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("OnCmd get name failed, err: %v", err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } - slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnCmd %s", cmdInFlush)) switch cmdName { case cmdInFlush: @@ -224,22 +219,22 @@ func (p *openaiChatGPTExtension) OnCmd( // send out outCmd, err := ten.NewCmd(cmdOutFlush) if err != nil { - slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } if err := tenEnv.SendCmd(outCmd, nil); err != nil { - slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + tenEnv.LogError(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err)) cmdResult, _ := ten.NewCmdResult(ten.StatusCodeError) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) return } else { - slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + tenEnv.LogInfo(fmt.Sprintf("cmd %s sent", cmdOutFlush)) } } cmdResult, _ := ten.NewCmdResult(ten.StatusCodeOk) - tenEnv.ReturnResult(cmdResult, cmd) + tenEnv.ReturnResult(cmdResult, cmd, nil) } // OnData receives data from ten graph. @@ -254,25 +249,25 @@ func (p *openaiChatGPTExtension) OnData( // Get isFinal isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyIsFinal, err), logTag) + tenEnv.LogWarn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyIsFinal, err)) return } if !isFinal { // ignore non-final - slog.Debug("ignore non-final input", logTag) + tenEnv.LogDebug("ignore non-final input") return } // Get input text inputText, err := data.GetPropertyString(dataInTextDataPropertyText) if err != nil { - slog.Error(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + tenEnv.LogError(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err)) return } if len(inputText) == 0 { - slog.Debug("ignore empty text", logTag) + tenEnv.LogDebug("ignore empty text") return } - slog.Info(fmt.Sprintf("OnData input text: [%s]", inputText), logTag) + tenEnv.LogInfo(fmt.Sprintf("OnData input text: [%s]", inputText)) // prepare memory for len(memoryChan) > 0 { @@ -297,12 +292,12 @@ func (p *openaiChatGPTExtension) OnData( wg.Add(1) go func(startTime time.Time, inputText string, memory []openai.ChatCompletionMessage) { defer wg.Done() - slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] memory: %v", inputText, memory), logTag) + tenEnv.LogInfo(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] memory: %v", inputText, memory)) // Get result from ai resp, err := p.openaiChatGPT.getChatCompletionsStream(memory) if err != nil { - slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] failed, err: %v", inputText, err), logTag) + tenEnv.LogError(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] failed, err: %v", inputText, err)) return } defer func() { @@ -310,20 +305,20 @@ func (p *openaiChatGPTExtension) OnData( resp.Close() } }() - slog.Debug(fmt.Sprintf("GetChatCompletionsStream start to recv for input text: [%s]", inputText), logTag) + tenEnv.LogDebug(fmt.Sprintf("GetChatCompletionsStream start to recv for input text: [%s]", inputText)) var sentence, fullContent string var firstSentenceSent bool for { if startTime.UnixMicro() < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("GetChatCompletionsStream recv interrupt and flushing for input text: [%s], startTs: %d, outdateTs: %d", - inputText, startTime.UnixMicro(), outdateTs.Load()), logTag) + tenEnv.LogInfo(fmt.Sprintf("GetChatCompletionsStream recv interrupt and flushing for input text: [%s], startTs: %d, outdateTs: %d", + inputText, startTime.UnixMicro(), outdateTs.Load())) break } chatCompletions, err := resp.Recv() if errors.Is(err, io.EOF) { - slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s], io.EOF break", inputText), logTag) + tenEnv.LogDebug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s], io.EOF break", inputText)) break } @@ -338,31 +333,31 @@ func (p *openaiChatGPTExtension) OnData( var sentenceIsFinal bool sentence, content, sentenceIsFinal = parseSentence(sentence, content) if len(sentence) == 0 || !sentenceIsFinal { - slog.Debug(fmt.Sprintf("sentence %s is empty or not final", sentence), logTag) + tenEnv.LogDebug(fmt.Sprintf("sentence %s is empty or not final", sentence)) break } - slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) + tenEnv.LogDebug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence)) // send sentence outputData, err := ten.NewData("text_data") if err != nil { - slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) + tenEnv.LogError(fmt.Sprintf("NewData failed, err: %v", err)) break } outputData.SetProperty(dataOutTextDataPropertyText, sentence) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) - if err := tenEnv.SendData(outputData); err != nil { - slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) + if err := tenEnv.SendData(outputData, nil); err != nil { + tenEnv.LogError(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err)) break } else { - slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] sent sentence [%s]", inputText, sentence), logTag) + tenEnv.LogInfo(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] sent sentence [%s]", inputText, sentence)) } sentence = "" if !firstSentenceSent { firstSentenceSent = true - slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] first sentence sent, first_sentency_latency %dms", - inputText, time.Since(startTime).Milliseconds()), logTag) + tenEnv.LogInfo(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] first sentence sent, first_sentency_latency %dms", + inputText, time.Since(startTime).Milliseconds())) } } } @@ -377,17 +372,15 @@ func (p *openaiChatGPTExtension) OnData( outputData, _ := ten.NewData("text_data") outputData.SetProperty(dataOutTextDataPropertyText, sentence) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := tenEnv.SendData(outputData); err != nil { - slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) + if err := tenEnv.SendData(outputData, nil); err != nil { + tenEnv.LogError(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err)) } else { - slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) + tenEnv.LogInfo(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence)) } }(time.Now(), inputText, append([]openai.ChatCompletionMessage{}, memory...)) } func init() { - slog.Info("init") - // Register addon ten.RegisterAddonAsExtension( "openai_chatgpt", diff --git a/agents/ten_packages/extension/openai_chatgpt_python/BUILD.gn b/agents/ten_packages/extension/openai_chatgpt_python/BUILD.gn index 23f06108..0b03f3ef 100644 --- a/agents/ten_packages/extension/openai_chatgpt_python/BUILD.gn +++ b/agents/ten_packages/extension/openai_chatgpt_python/BUILD.gn @@ -14,7 +14,6 @@ ten_package("openai_chatgpt_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", ] diff --git a/agents/ten_packages/extension/openai_chatgpt_python/__init__.py b/agents/ten_packages/extension/openai_chatgpt_python/__init__.py index 09a409ff..8cd75dde 100644 --- a/agents/ten_packages/extension/openai_chatgpt_python/__init__.py +++ b/agents/ten_packages/extension/openai_chatgpt_python/__init__.py @@ -6,6 +6,3 @@ # # from . import addon -from .log import logger - -logger.info("openai_chatgpt_python extension loaded") diff --git a/agents/ten_packages/extension/openai_chatgpt_python/extension.py b/agents/ten_packages/extension/openai_chatgpt_python/extension.py index 81e751f1..4fb6fd96 100644 --- a/agents/ten_packages/extension/openai_chatgpt_python/extension.py +++ b/agents/ten_packages/extension/openai_chatgpt_python/extension.py @@ -11,11 +11,22 @@ from typing import Iterable from ten.async_ten_env import AsyncTenEnv -from ten.ten_env import TenEnv from ten_ai_base.const import CMD_PROPERTY_RESULT, CMD_TOOL_CALL -from ten_ai_base.helper import AsyncEventEmitter, get_properties_int, get_properties_string, get_properties_float, get_property_bool, get_property_int, get_property_string -from ten_ai_base.llm import AsyncLLMBaseExtension -from ten_ai_base.types import LLMCallCompletionArgs, LLMChatCompletionContentPartParam, LLMChatCompletionUserMessageParam, LLMChatCompletionMessageParam, LLMDataCompletionArgs, LLMToolMetadata, LLMToolResult +from ten_ai_base.helper import ( + AsyncEventEmitter, + get_property_bool, + get_property_string, +) +from ten_ai_base import AsyncLLMBaseExtension +from ten_ai_base.types import ( + LLMCallCompletionArgs, + LLMChatCompletionContentPartParam, + LLMChatCompletionUserMessageParam, + LLMChatCompletionMessageParam, + LLMDataCompletionArgs, + LLMToolMetadata, + LLMToolResult, +) from .helper import parse_sentences from .openai import OpenAIChatGPT, OpenAIChatGPTConfig @@ -44,113 +55,118 @@ def __init__(self, name: str): self.config = None self.client = None self.sentence_fragment = "" - self.toolcall_future = None + self.tool_task_future = None self.users_count = 0 - async def on_init(self, ten_env: AsyncTenEnv) -> None: - ten_env.log_info("on_init") - await super().on_init(ten_env) - - async def on_start(self, ten_env: AsyncTenEnv) -> None: - ten_env.log_info("on_start") - await super().on_start(ten_env) + async def on_init(self, async_ten_env: AsyncTenEnv) -> None: + async_ten_env.log_info("on_init") + await super().on_init(async_ten_env) - self.config = OpenAIChatGPTConfig.create(ten_env=ten_env) + async def on_start(self, async_ten_env: AsyncTenEnv) -> None: + async_ten_env.log_info("on_start") + await super().on_start(async_ten_env) + self.config = await OpenAIChatGPTConfig.create_async(ten_env=async_ten_env) # Mandatory properties if not self.config.api_key: - ten_env.log_info(f"API key is missing, exiting on_start") + async_ten_env.log_info("API key is missing, exiting on_start") return # Create instance try: - self.client = OpenAIChatGPT(ten_env, self.config) - ten_env.log_info( - f"initialized with max_tokens: {self.config.max_tokens}, model: {self.config.model}, vendor: {self.config.vendor}") + self.client = OpenAIChatGPT(async_ten_env, self.config) + async_ten_env.log_info( + f"initialized with max_tokens: {self.config.max_tokens}, model: {self.config.model}, vendor: {self.config.vendor}" + ) except Exception as err: - ten_env.log_info(f"Failed to initialize OpenAIChatGPT: {err}") + async_ten_env.log_info(f"Failed to initialize OpenAIChatGPT: {err}") - async def on_stop(self, ten_env: AsyncTenEnv) -> None: - ten_env.log_info("on_stop") - await super().on_stop(ten_env) + async def on_stop(self, async_ten_env: AsyncTenEnv) -> None: + async_ten_env.log_info("on_stop") + await super().on_stop(async_ten_env) - async def on_deinit(self, ten_env: AsyncTenEnv) -> None: - ten_env.log_info("on_deinit") - await super().on_deinit(ten_env) + async def on_deinit(self, async_ten_env: AsyncTenEnv) -> None: + async_ten_env.log_info("on_deinit") + await super().on_deinit(async_ten_env) - async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: + async def on_cmd(self, async_ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() - ten_env.log_info(f"on_cmd name: {cmd_name}") + async_ten_env.log_info(f"on_cmd name: {cmd_name}") if cmd_name == CMD_IN_FLUSH: - await self.flush_input_items(ten_env) - await ten_env.send_cmd(Cmd.create(CMD_OUT_FLUSH)) - ten_env.log_info("on_cmd sent flush") + await self.flush_input_items(async_ten_env) + await async_ten_env.send_cmd(Cmd.create(CMD_OUT_FLUSH)) + async_ten_env.log_info("on_cmd sent flush") status_code, detail = StatusCode.OK, "success" cmd_result = CmdResult.create(status_code) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await async_ten_env.return_result(cmd_result, cmd) elif cmd_name == CMD_IN_ON_USER_JOINED: self.users_count += 1 # Send greeting when first user joined if self.config.greeting and self.users_count == 1: - self.send_text_output(ten_env, self.config.greeting, True) + self.send_text_output(async_ten_env, self.config.greeting, True) status_code, detail = StatusCode.OK, "success" cmd_result = CmdResult.create(status_code) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await async_ten_env.return_result(cmd_result, cmd) elif cmd_name == CMD_IN_ON_USER_LEFT: self.users_count -= 1 status_code, detail = StatusCode.OK, "success" cmd_result = CmdResult.create(status_code) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await async_ten_env.return_result(cmd_result, cmd) else: - await super().on_cmd(ten_env, cmd) + await super().on_cmd(async_ten_env, cmd) - async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: + async def on_data(self, async_ten_env: AsyncTenEnv, data: Data) -> None: data_name = data.get_name() - ten_env.log_debug("on_data name {}".format(data_name)) + async_ten_env.log_debug("on_data name {}".format(data_name)) # Get the necessary properties is_final = get_property_bool(data, "is_final") input_text = get_property_string(data, "text") if not is_final: - ten_env.log_debug("ignore non-final input") + async_ten_env.log_debug("ignore non-final input") return if not input_text: - ten_env.log_warn("ignore empty text") + async_ten_env.log_warn("ignore empty text") return - ten_env.log_info(f"OnData input text: [{input_text}]") + async_ten_env.log_info(f"OnData input text: [{input_text}]") # Start an asynchronous task for handling chat completion - message = LLMChatCompletionUserMessageParam( - role="user", content=input_text) + message = LLMChatCompletionUserMessageParam(role="user", content=input_text) await self.queue_input_item(False, messages=[message]) - async def on_tools_update(self, ten_env: TenEnv, tool: LLMToolMetadata) -> None: - return await super().on_tools_update(ten_env, tool) + async def on_tools_update( + self, async_ten_env: AsyncTenEnv, tool: LLMToolMetadata + ) -> None: + return await super().on_tools_update(async_ten_env, tool) - async def on_call_chat_completion(self, ten_env: TenEnv, **kargs: LLMCallCompletionArgs) -> any: - kmessages: LLMChatCompletionUserMessageParam = kargs.get( - "messages", []) + async def on_call_chat_completion( + self, async_ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs + ) -> any: + kmessages: LLMChatCompletionUserMessageParam = kargs.get("messages", []) - ten_env.log_info(f"on_call_chat_completion: {kmessages}") - response = await self.client.get_chat_completions( - kmessages, None) + async_ten_env.log_info(f"on_call_chat_completion: {kmessages}") + response = await self.client.get_chat_completions(kmessages, None) return response.to_json() - async def on_data_chat_completion(self, ten_env: TenEnv, **kargs: LLMDataCompletionArgs) -> None: + async def on_data_chat_completion( + self, async_ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs + ) -> None: """Run the chatflow asynchronously.""" - kmessage: LLMChatCompletionUserMessageParam = kargs.get("messages", [])[ - 0] + kmessages: Iterable[LLMChatCompletionUserMessageParam] = kargs.get( + "messages", [] + ) + kmessage = next(iter(kmessages), None) if not kmessage: - ten_env.log_error("No message in data") + async_ten_env.log_error("No message in data") return message = self.message_to_dict(kmessage) @@ -158,28 +174,39 @@ async def on_data_chat_completion(self, ten_env: TenEnv, **kargs: LLMDataComplet self.memory_cache = [] memory = self.memory try: - ten_env.log_info(f"for input text: [{message}] memory: {memory}") + async_ten_env.log_info(f"for input text: [{message}] memory: {memory}") tools = None no_tool = kargs.get("no_tool", False) - if not isinstance(message.get("content"), str) and message.get("role") == "user": - non_artifact_content = [item for item in message.get( - "content", []) if item.get("type") == "text"] - non_artifact_message = {"role": message.get( - "role"), "content": non_artifact_content} - self.memory_cache = self.memory_cache + \ - [non_artifact_message, { - "role": "assistant", "content": ""}] + if ( + not isinstance(message.get("content"), str) + and message.get("role") == "user" + ): + non_artifact_content = [ + item + for item in message.get("content", []) + if item.get("type") == "text" + ] + non_artifact_message = { + "role": message.get("role"), + "content": non_artifact_content, + } + self.memory_cache = self.memory_cache + [ + non_artifact_message, + {"role": "assistant", "content": ""}, + ] else: - self.memory_cache = self.memory_cache + \ - [message, {"role": "assistant", "content": ""}] + self.memory_cache = self.memory_cache + [ + message, + {"role": "assistant", "content": ""}, + ] tools = None if not no_tool and len(self.available_tools) > 0: tools = [] for tool in self.available_tools: tools.append(self._convert_tools_to_dict(tool)) - ten_env.log_info(f"tool: {tool}") + async_ten_env.log_info(f"tool: {tool}") self.sentence_fragment = "" @@ -191,49 +218,57 @@ async def on_data_chat_completion(self, ten_env: TenEnv, **kargs: LLMDataComplet # Create an async listener to handle tool calls and content updates async def handle_tool_call(tool_call): self.tool_task_future = asyncio.get_event_loop().create_future() - ten_env.log_info(f"tool_call: {tool_call}") + async_ten_env.log_info(f"tool_call: {tool_call}") for tool in self.available_tools: if tool_call["function"]["name"] == tool.name: cmd: Cmd = Cmd.create(CMD_TOOL_CALL) cmd.set_property_string("name", tool.name) cmd.set_property_from_json( - "arguments", tool_call["function"]["arguments"]) + "arguments", tool_call["function"]["arguments"] + ) # cmd.set_property_from_json("arguments", json.dumps([])) # Send the command and handle the result through the future - result: CmdResult = await ten_env.send_cmd(cmd) + result: CmdResult = await async_ten_env.send_cmd(cmd) if result.get_status_code() == StatusCode.OK: tool_result: LLMToolResult = json.loads( - result.get_property_to_json(CMD_PROPERTY_RESULT)) + result.get_property_to_json(CMD_PROPERTY_RESULT) + ) - ten_env.log_info(f"tool_result: {tool_result}") + async_ten_env.log_info(f"tool_result: {tool_result}") # self.memory_cache = [] self.memory_cache.pop() result_content = tool_result["content"] nonlocal message new_message = { "role": "user", - "content": self._convert_to_content_parts(message["content"]) + "content": self._convert_to_content_parts( + message["content"] + ), } - new_message["content"] = new_message["content"] + \ - self._convert_to_content_parts(result_content) - await self.queue_input_item(True, messages=[new_message], no_tool=True) + new_message["content"] = new_message[ + "content" + ] + self._convert_to_content_parts(result_content) + await self.queue_input_item( + True, messages=[new_message], no_tool=True + ) else: - ten_env.log_error(f"Tool call failed") + async_ten_env.log_error("Tool call failed") self.tool_task_future.set_result(None) async def handle_content_update(content: str): # Append the content to the last assistant message for item in reversed(self.memory_cache): - if item.get('role') == 'assistant': - item['content'] = item['content'] + content + if item.get("role") == "assistant": + item["content"] = item["content"] + content break sentences, self.sentence_fragment = parse_sentences( - self.sentence_fragment, content) + self.sentence_fragment, content + ) for s in sentences: - self.send_text_output(ten_env, s, False) + self.send_text_output(async_ten_env, s, False) - async def handle_content_finished(full_content: str): + async def handle_content_finished(_: str): # Wait for the single tool task to complete (if any) if self.tool_task_future: await self.tool_task_future @@ -245,39 +280,42 @@ async def handle_content_finished(full_content: str): listener.on("content_finished", handle_content_finished) # Make an async API call to get chat completions - await self.client.get_chat_completions_stream(memory + [message], tools, listener) + await self.client.get_chat_completions_stream( + memory + [message], tools, listener + ) # Wait for the content to be finished await content_finished_event.wait() - ten_env.log_info( - f"Chat completion finished for input text: {message}") + async_ten_env.log_info( + f"Chat completion finished for input text: {message}" + ) except asyncio.CancelledError: - ten_env.log_info(f"Task cancelled: {message}") - except Exception as e: - ten_env.log_error( - f"Error in chat_completion: {traceback.format_exc()} for input text: {message}") + async_ten_env.log_info(f"Task cancelled: {message}") + except Exception: + async_ten_env.log_error( + f"Error in chat_completion: {traceback.format_exc()} for input text: {message}" + ) finally: - self.send_text_output(ten_env, "", True) + self.send_text_output(async_ten_env, "", True) # always append the memory for m in self.memory_cache: self._append_memory(m) - def _convert_to_content_parts(self, content: Iterable[LLMChatCompletionContentPartParam]): + def _convert_to_content_parts( + self, content: Iterable[LLMChatCompletionContentPartParam] + ): content_parts = [] if isinstance(content, str): - content_parts.append({ - "type": "text", - "text": content - }) + content_parts.append({"type": "text", "text": content}) else: for part in content: content_parts.append(part) return content_parts def _convert_tools_to_dict(self, tool: LLMToolMetadata): - json = { + json_dict = { "type": "function", "function": { "name": tool.name, @@ -286,21 +324,21 @@ def _convert_tools_to_dict(self, tool: LLMToolMetadata): "type": "object", "properties": {}, "required": [], - "additionalProperties": False + "additionalProperties": False, }, }, - "strict": True + "strict": True, } for param in tool.parameters: - json["function"]["parameters"]["properties"][param.name] = { + json_dict["function"]["parameters"]["properties"][param.name] = { "type": param.type, - "description": param.description + "description": param.description, } if param.required: - json["function"]["parameters"]["required"].append(param.name) + json_dict["function"]["parameters"]["required"].append(param.name) - return json + return json_dict def message_to_dict(self, message: LLMChatCompletionMessageParam): if isinstance(message["content"], str): diff --git a/agents/ten_packages/extension/openai_chatgpt_python/helper.py b/agents/ten_packages/extension/openai_chatgpt_python/helper.py index 749916c0..4c1cb3a6 100644 --- a/agents/ten_packages/extension/openai_chatgpt_python/helper.py +++ b/agents/ten_packages/extension/openai_chatgpt_python/helper.py @@ -5,15 +5,12 @@ # Copyright (c) 2024 Agora IO. All rights reserved. # # -import asyncio -from collections import deque -from ten.data import Data -from .log import logger from PIL import Image from datetime import datetime from io import BytesIO from base64 import b64encode + def get_current_time(): # Get the current time start_time = datetime.now() @@ -44,7 +41,6 @@ def parse_sentences(sentence_fragment, content): return sentences, remain - def rgb2base64jpeg(rgb_data, width, height): # Convert the RGB image to a PIL Image pil_image = Image.frombytes("RGBA", (width, height), bytes(rgb_data)) @@ -100,4 +96,4 @@ def resize_image_keep_aspect(image, max_size=512): # Resize the image with the new dimensions resized_image = image.resize((new_width, new_height)) - return resized_image \ No newline at end of file + return resized_image diff --git a/agents/ten_packages/extension/openai_chatgpt_python/log.py b/agents/ten_packages/extension/openai_chatgpt_python/log.py deleted file mode 100644 index 1813e965..00000000 --- a/agents/ten_packages/extension/openai_chatgpt_python/log.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by Wei Hu in 2024-08. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# -import logging - -logger = logging.getLogger("openai_chatgpt_python") -logger.setLevel(logging.INFO) - -formatter_str = ( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " - "[%(filename)s:%(lineno)d] - %(message)s" -) -formatter = logging.Formatter(formatter_str) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/openai_chatgpt_python/manifest.json b/agents/ten_packages/extension/openai_chatgpt_python/manifest.json index 16fb38d6..f71d0d76 100644 --- a/agents/ten_packages/extension/openai_chatgpt_python/manifest.json +++ b/agents/ten_packages/extension/openai_chatgpt_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -102,21 +102,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/extension/openai_chatgpt_python/openai.py b/agents/ten_packages/extension/openai_chatgpt_python/openai.py index 5ac24e06..8c4845ea 100644 --- a/agents/ten_packages/extension/openai_chatgpt_python/openai.py +++ b/agents/ten_packages/extension/openai_chatgpt_python/openai.py @@ -11,7 +11,6 @@ import requests from openai import AsyncOpenAI, AsyncAzureOpenAI from openai.types.chat.chat_completion import ChatCompletion -from typing import List, Dict, Any, Literal, Optional, Union from ten.async_ten_env import AsyncTenEnv from ten_ai_base.config import BaseConfig @@ -21,8 +20,12 @@ class OpenAIChatGPTConfig(BaseConfig): api_key: str = "" base_url: str = "https://api.openai.com/v1" - model: str = "gpt-4o" # Adjust this to match the equivalent of `openai.GPT4o` in the Python library - prompt: str = "You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points." + model: str = ( + "gpt-4o" # Adjust this to match the equivalent of `openai.GPT4o` in the Python library + ) + prompt: str = ( + "You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points." + ) frequency_penalty: float = 0.9 presence_penalty: float = 0.9 top_p: float = 1.0 @@ -39,21 +42,21 @@ class OpenAIChatGPTConfig(BaseConfig): class OpenAIChatGPT: client = None - def __init__(self, ten_env:AsyncTenEnv, config: OpenAIChatGPTConfig): + + def __init__(self, ten_env: AsyncTenEnv, config: OpenAIChatGPTConfig): self.config = config ten_env.log_info(f"OpenAIChatGPT initialized with config: {config.api_key}") if self.config.vendor == "azure": self.client = AsyncAzureOpenAI( api_key=config.api_key, api_version=self.config.azure_api_version, - azure_endpoint=config.azure_endpoint + azure_endpoint=config.azure_endpoint, ) - ten_env.log_info(f"Using Azure OpenAI with endpoint: {config.azure_endpoint}, api_version: {config.azure_api_version}") - else: - self.client = AsyncOpenAI( - api_key=config.api_key, - base_url=config.base_url + ten_env.log_info( + f"Using Azure OpenAI with endpoint: {config.azure_endpoint}, api_version: {config.azure_api_version}" ) + else: + self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url) self.session = requests.Session() if config.proxy_url: proxies = { @@ -64,7 +67,7 @@ def __init__(self, ten_env:AsyncTenEnv, config: OpenAIChatGPTConfig): self.session.proxies.update(proxies) self.client.session = self.session - async def get_chat_completions(self, messages, tools = None) -> ChatCompletion: + async def get_chat_completions(self, messages, tools=None) -> ChatCompletion: req = { "model": self.config.model, "messages": [ @@ -86,11 +89,11 @@ async def get_chat_completions(self, messages, tools = None) -> ChatCompletion: try: response = await self.client.chat.completions.create(**req) except Exception as e: - raise Exception(f"CreateChatCompletion failed, err: {e}") + raise RuntimeError(f"CreateChatCompletion failed, err: {e}") from e return response - async def get_chat_completions_stream(self, messages, tools = None, listener = None): + async def get_chat_completions_stream(self, messages, tools=None, listener=None): req = { "model": self.config.model, "messages": [ @@ -113,11 +116,17 @@ async def get_chat_completions_stream(self, messages, tools = None, listener = N try: response = await self.client.chat.completions.create(**req) except Exception as e: - raise Exception(f"CreateChatCompletionStream failed, err: {e}") - + raise RuntimeError(f"CreateChatCompletionStream failed, err: {e}") from e + full_content = "" # Check for tool calls - tool_calls_dict = defaultdict(lambda: {"id": None, "function": {"arguments": "", "name": None}, "type": None}) + tool_calls_dict = defaultdict( + lambda: { + "id": None, + "function": {"arguments": "", "name": None}, + "type": None, + } + ) async for chat_completion in response: if len(chat_completion.choices) == 0: @@ -129,7 +138,7 @@ async def get_chat_completions_stream(self, messages, tools = None, listener = N # Emit content update event (fire-and-forget) if listener and content: - listener.emit('content_update', content) + listener.emit("content_update", content) full_content += content @@ -140,24 +149,27 @@ async def get_chat_completions_stream(self, messages, tools = None, listener = N # If the function name is not None, set it if tool_call.function.name is not None: - tool_calls_dict[tool_call.index]["function"]["name"] = tool_call.function.name + tool_calls_dict[tool_call.index]["function"][ + "name" + ] = tool_call.function.name # Append the arguments - tool_calls_dict[tool_call.index]["function"]["arguments"] += tool_call.function.arguments + tool_calls_dict[tool_call.index]["function"][ + "arguments" + ] += tool_call.function.arguments # If the type is not None, set it if tool_call.type is not None: tool_calls_dict[tool_call.index]["type"] = tool_call.type - # Convert the dictionary to a list tool_calls_list = list(tool_calls_dict.values()) # Emit tool calls event (fire-and-forget) if listener and tool_calls_list: for tool_call in tool_calls_list: - listener.emit('tool_call', tool_call) + listener.emit("tool_call", tool_call) # Emit content finished event after the loop completes if listener: - listener.emit('content_finished', full_content) + listener.emit("content_finished", full_content) diff --git a/agents/ten_packages/extension/openai_v2v_python/BUILD.gn b/agents/ten_packages/extension/openai_v2v_python/BUILD.gn index 1a40f3c2..10ce2da4 100644 --- a/agents/ten_packages/extension/openai_v2v_python/BUILD.gn +++ b/agents/ten_packages/extension/openai_v2v_python/BUILD.gn @@ -14,7 +14,6 @@ ten_package("openai_v2v_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", ] diff --git a/agents/ten_packages/extension/openai_v2v_python/addon.py b/agents/ten_packages/extension/openai_v2v_python/addon.py index 1bddfd2a..be3aa46a 100644 --- a/agents/ten_packages/extension/openai_v2v_python/addon.py +++ b/agents/ten_packages/extension/openai_v2v_python/addon.py @@ -17,5 +17,6 @@ class OpenAIRealtimeExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: from .extension import OpenAIRealtimeExtension + ten_env.log_info("OpenAIRealtimeExtensionAddon on_create_instance") ten_env.on_create_instance_done(OpenAIRealtimeExtension(name), context) diff --git a/agents/ten_packages/extension/openai_v2v_python/extension.py b/agents/ten_packages/extension/openai_v2v_python/extension.py index 305ac179..680769fa 100644 --- a/agents/ten_packages/extension/openai_v2v_python/extension.py +++ b/agents/ten_packages/extension/openai_v2v_python/extension.py @@ -7,6 +7,8 @@ # import asyncio import base64 +import json +from enum import Enum import traceback import time import numpy as np @@ -23,22 +25,68 @@ ) from ten.audio_frame import AudioFrameDataFmt from ten_ai_base.const import CMD_PROPERTY_RESULT, CMD_TOOL_CALL -from ten_ai_base.llm import AsyncLLMBaseExtension -from dataclasses import dataclass, field -from ten_ai_base import BaseConfig, ChatMemory, EVENT_MEMORY_EXPIRED, EVENT_MEMORY_APPENDED, LLMUsage, LLMCompletionTokensDetails, LLMPromptTokensDetails -from ten_ai_base.types import LLMToolMetadata, LLMToolResult, LLMChatCompletionContentPartParam +from ten_ai_base import AsyncLLMBaseExtension +from dataclasses import dataclass +from ten_ai_base.config import BaseConfig +from ten_ai_base.chat_memory import ( + ChatMemory, + EVENT_MEMORY_EXPIRED, + EVENT_MEMORY_APPENDED, +) +from ten_ai_base.usage import ( + LLMUsage, + LLMCompletionTokensDetails, + LLMPromptTokensDetails, +) +from ten_ai_base.types import ( + LLMToolMetadata, + LLMToolResult, + LLMChatCompletionContentPartParam, +) from .realtime.connection import RealtimeApiConnection -from .realtime.struct import * +from .realtime.struct import ( + ItemCreate, + SessionCreated, + ItemCreated, + UserMessageItemParam, + AssistantMessageItemParam, + ItemInputAudioTranscriptionCompleted, + ItemInputAudioTranscriptionFailed, + ResponseCreated, + ResponseDone, + ResponseAudioTranscriptDelta, + ResponseTextDelta, + ResponseAudioTranscriptDone, + ResponseTextDone, + ResponseOutputItemDone, + ResponseOutputItemAdded, + ResponseAudioDelta, + ResponseAudioDone, + InputAudioBufferSpeechStarted, + InputAudioBufferSpeechStopped, + ResponseFunctionCallArgumentsDone, + ErrorMessage, + ItemDelete, + ItemTruncate, + SessionUpdate, + SessionUpdateParams, + InputAudioTranscription, + ContentType, + FunctionCallOutputItemParam, + ResponseCreate, +) CMD_IN_FLUSH = "flush" CMD_IN_ON_USER_JOINED = "on_user_joined" CMD_IN_ON_USER_LEFT = "on_user_left" CMD_OUT_FLUSH = "flush" + class Role(str, Enum): User = "user" Assistant = "assistant" + @dataclass class OpenAIRealtimeConfig(BaseConfig): base_uri: str = "wss://api.openai.com" @@ -68,28 +116,37 @@ def build_ctx(self) -> dict: "model": self.model, } -class OpenAIRealtimeExtension(AsyncLLMBaseExtension): - config: OpenAIRealtimeConfig = None - stopped: bool = False - connected: bool = False - buffer: bytearray = b'' - memory: ChatMemory = None - total_usage: LLMUsage = LLMUsage() - users_count = 0 - - stream_id: int = 0 - remote_stream_id: int = 0 - channel_name: str = "" - audio_len_threshold: int = 5120 - completion_times = [] - connect_times = [] - first_token_times = [] +class OpenAIRealtimeExtension(AsyncLLMBaseExtension): - buff: bytearray = b'' - transcript: str = "" - ctx: dict = {} - input_end = time.time() + def __init__(self, name: str): + super().__init__(name) + self.ten_env: AsyncTenEnv = None + self.conn = None + self.session = None + self.session_id = None + + self.config: OpenAIRealtimeConfig = None + self.stopped: bool = False + self.connected: bool = False + self.buffer: bytearray = b"" + self.memory: ChatMemory = None + self.total_usage: LLMUsage = LLMUsage() + self.users_count = 0 + + self.stream_id: int = 0 + self.remote_stream_id: int = 0 + self.channel_name: str = "" + self.audio_len_threshold: int = 5120 + + self.completion_times = [] + self.connect_times = [] + self.first_token_times = [] + + self.buff: bytearray = b"" + self.transcript: str = "" + self.ctx: dict = {} + self.input_end = time.time() async def on_init(self, ten_env: AsyncTenEnv) -> None: await super().on_init(ten_env) @@ -98,10 +155,11 @@ async def on_init(self, ten_env: AsyncTenEnv) -> None: async def on_start(self, ten_env: AsyncTenEnv) -> None: await super().on_start(ten_env) ten_env.log_debug("on_start") + self.ten_env = ten_env self.loop = asyncio.get_event_loop() - self.config = OpenAIRealtimeConfig.create(ten_env=ten_env) + self.config = await OpenAIRealtimeConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") if not self.config.api_key: @@ -120,35 +178,38 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: self.memory.put(i) ten_env.log_info(f"on retrieve context {history}") except Exception as e: - ten_env.log_error("Failed to handle retrieve result {e}") + ten_env.log_error(f"Failed to handle retrieve result {e}") else: ten_env.log_warn("Failed to retrieve content") - + self.memory.on(EVENT_MEMORY_EXPIRED, self._on_memory_expired) self.memory.on(EVENT_MEMORY_APPENDED, self._on_memory_appended) self.ctx = self.config.build_ctx() self.ctx["greeting"] = self.config.greeting - + self.conn = RealtimeApiConnection( ten_env=ten_env, - base_uri=self.config.base_uri, path=self.config.path, api_key=self.config.api_key, model=self.config.model, vendor=self.config.vendor) - ten_env.log_info(f"Finish init client") + base_uri=self.config.base_uri, + path=self.config.path, + api_key=self.config.api_key, + model=self.config.model, + vendor=self.config.vendor, + ) + ten_env.log_info("Finish init client") self.loop.create_task(self._loop()) except Exception as e: traceback.print_exc() self.ten_env.log_error(f"Failed to init client {e}") - self.ten_env = ten_env - async def on_stop(self, ten_env: AsyncTenEnv) -> None: await super().on_stop(ten_env) ten_env.log_info("on_stop") self.stopped = True - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame(self, _: AsyncTenEnv, audio_frame: AudioFrame) -> None: try: stream_id = audio_frame.get_property_int("stream_id") if self.channel_name == "": @@ -193,7 +254,7 @@ async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_result = CmdResult.create(status) cmd_result.set_property_string("detail", detail) - ten_env.return_result(cmd_result, cmd) + await ten_env.return_result(cmd_result, cmd) # Not support for now async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: @@ -220,7 +281,9 @@ def get_time_ms() -> int: # self.ten_env.log_info(f"Received message: {message.type}") match message: case SessionCreated(): - self.ten_env.log_info(f"Session is created: {message.session}") + self.ten_env.log_info( + f"Session is created: {message.session}" + ) self.session_id = message.session.id self.session = message.session await self._update_session() @@ -228,9 +291,31 @@ def get_time_ms() -> int: history = self.memory.get() for h in history: if h["role"] == "user": - await self.conn.send_request(ItemCreate(item=UserMessageItemParam(content=[{"type": ContentType.InputText, "text": h["content"]}]))) + await self.conn.send_request( + ItemCreate( + item=UserMessageItemParam( + content=[ + { + "type": ContentType.InputText, + "text": h["content"], + } + ] + ) + ) + ) elif h["role"] == "assistant": - await self.conn.send_request(ItemCreate(item=AssistantMessageItemParam(content=[{"type": ContentType.InputText, "text": h["content"]}]))) + await self.conn.send_request( + ItemCreate( + item=AssistantMessageItemParam( + content=[ + { + "type": ContentType.InputText, + "text": h["content"], + } + ] + ) + ) + ) self.ten_env.log_info(f"Finish send history {history}") self.memory.clear() @@ -238,61 +323,88 @@ def get_time_ms() -> int: self.connected = True await self._greeting() case ItemInputAudioTranscriptionCompleted(): - self.ten_env.log_info(f"On request transcript {message.transcript}") + self.ten_env.log_info( + f"On request transcript {message.transcript}" + ) self._send_transcript(message.transcript, Role.User, True) - self.memory.put({"role": "user", "content": message.transcript, "id": message.item_id}) + self.memory.put( + { + "role": "user", + "content": message.transcript, + "id": message.item_id, + } + ) case ItemInputAudioTranscriptionFailed(): - self.ten_env.log_warn(f"On request transcript failed {message.item_id} {message.error}") + self.ten_env.log_warn( + f"On request transcript failed {message.item_id} {message.error}" + ) case ItemCreated(): self.ten_env.log_info(f"On item created {message.item}") case ResponseCreated(): response_id = message.response.id - self.ten_env.log_info( - f"On response created {response_id}") + self.ten_env.log_info(f"On response created {response_id}") case ResponseDone(): - id = message.response.id + msg_resp_id = message.response.id status = message.response.status - if id == response_id: + if msg_resp_id == response_id: response_id = "" self.ten_env.log_info( - f"On response done {id} {status} {message.response.usage}") + f"On response done {msg_resp_id} {status} {message.response.usage}" + ) if message.response.usage: - await self._update_usage(message.response.usage) + pass + # await self._update_usage(message.response.usage) case ResponseAudioTranscriptDelta(): self.ten_env.log_info( - f"On response transcript delta {message.response_id} {message.output_index} {message.content_index} {message.delta}") + f"On response transcript delta {message.response_id} {message.output_index} {message.content_index} {message.delta}" + ) if message.response_id in flushed: self.ten_env.log_warn( - f"On flushed transcript delta {message.response_id} {message.output_index} {message.content_index} {message.delta}") + f"On flushed transcript delta {message.response_id} {message.output_index} {message.content_index} {message.delta}" + ) continue self._send_transcript(message.delta, Role.Assistant, False) case ResponseTextDelta(): self.ten_env.log_info( - f"On response text delta {message.response_id} {message.output_index} {message.content_index} {message.delta}") + f"On response text delta {message.response_id} {message.output_index} {message.content_index} {message.delta}" + ) if message.response_id in flushed: self.ten_env.log_warn( - f"On flushed text delta {message.response_id} {message.output_index} {message.content_index} {message.delta}") + f"On flushed text delta {message.response_id} {message.output_index} {message.content_index} {message.delta}" + ) continue if item_id != message.item_id: item_id = message.item_id - self.first_token_times.append(time.time() - self.input_end) + self.first_token_times.append( + time.time() - self.input_end + ) self._send_transcript(message.delta, Role.Assistant, False) case ResponseAudioTranscriptDone(): self.ten_env.log_info( - f"On response transcript done {message.output_index} {message.content_index} {message.transcript}") + f"On response transcript done {message.output_index} {message.content_index} {message.transcript}" + ) if message.response_id in flushed: self.ten_env.log_warn( - f"On flushed transcript done {message.response_id}") + f"On flushed transcript done {message.response_id}" + ) continue - self.memory.put({"role": "assistant", "content": message.transcript, "id": message.item_id}) + self.memory.put( + { + "role": "assistant", + "content": message.transcript, + "id": message.item_id, + } + ) self.transcript = "" self._send_transcript("", Role.Assistant, True) case ResponseTextDone(): self.ten_env.log_info( - f"On response text done {message.output_index} {message.content_index} {message.text}") + f"On response text done {message.output_index} {message.content_index} {message.text}" + ) if message.response_id in flushed: self.ten_env.log_warn( - f"On flushed text done {message.response_id}") + f"On flushed text done {message.response_id}" + ) continue self.completion_times.append(time.time() - self.input_end) self.transcript = "" @@ -301,27 +413,35 @@ def get_time_ms() -> int: self.ten_env.log_info(f"Output item done {message.item}") case ResponseOutputItemAdded(): self.ten_env.log_info( - f"Output item added {message.output_index} {message.item}") + f"Output item added {message.output_index} {message.item}" + ) case ResponseAudioDelta(): if message.response_id in flushed: self.ten_env.log_warn( - f"On flushed audio delta {message.response_id} {message.item_id} {message.content_index}") + f"On flushed audio delta {message.response_id} {message.item_id} {message.content_index}" + ) continue if item_id != message.item_id: item_id = message.item_id - self.first_token_times.append(time.time() - self.input_end) + self.first_token_times.append( + time.time() - self.input_end + ) content_index = message.content_index - self._on_audio_delta(message.delta) + await self._on_audio_delta(message.delta) case ResponseAudioDone(): self.completion_times.append(time.time() - self.input_end) case InputAudioBufferSpeechStarted(): self.ten_env.log_info( - f"On server listening, in response {response_id}, last item {item_id}") + f"On server listening, in response {response_id}, last item {item_id}" + ) # Tuncate the on-going audio stream end_ms = get_time_ms() - relative_start_ms if item_id: truncate = ItemTruncate( - item_id=item_id, content_index=content_index, audio_end_ms=end_ms) + item_id=item_id, + content_index=content_index, + audio_end_ms=end_ms, + ) await self.conn.send_request(truncate) if self.config.server_vad: await self._flush() @@ -337,22 +457,25 @@ def get_time_ms() -> int: self.input_end = time.time() relative_start_ms = get_time_ms() - message.audio_end_ms self.ten_env.log_info( - f"On server stop listening, {message.audio_end_ms}, relative {relative_start_ms}") + f"On server stop listening, {message.audio_end_ms}, relative {relative_start_ms}" + ) case ResponseFunctionCallArgumentsDone(): tool_call_id = message.call_id name = message.name arguments = message.arguments self.ten_env.log_info(f"need to call func {name}") - self.loop.create_task(self._handle_tool_call(tool_call_id, name, arguments)) + self.loop.create_task( + self._handle_tool_call(tool_call_id, name, arguments) + ) case ErrorMessage(): self.ten_env.log_error( - f"Error message received: {message.error}") + f"Error message received: {message.error}" + ) case _: self.ten_env.log_debug(f"Not handled message {message}") except Exception as e: traceback.print_exc() - self.ten_env.log_error( - f"Error processing message: {message} {e}") + self.ten_env.log_error(f"Error processing message: {message} {e}") self.ten_env.log_info("Client loop finished") except Exception as e: @@ -362,7 +485,7 @@ def get_time_ms() -> int: # clear so that new session can be triggered self.connected = False self.remote_stream_id = 0 - + if not self.stopped: await self.conn.close() await asyncio.sleep(0.5) @@ -370,10 +493,15 @@ def get_time_ms() -> int: self.conn = RealtimeApiConnection( ten_env=self.ten_env, - base_uri=self.config.base_uri, path=self.config.path, api_key=self.config.api_key, model=self.config.model, vendor=self.config.vendor) - + base_uri=self.config.base_uri, + path=self.config.path, + api_key=self.config.api_key, + model=self.config.model, + vendor=self.config.vendor, + ) + self.loop.create_task(self._loop()) - + async def _on_memory_expired(self, message: dict) -> None: self.ten_env.log_info(f"Memory expired: {message}") item_id = message.get("item_id") @@ -384,7 +512,7 @@ async def _on_memory_appended(self, message: dict) -> None: self.ten_env.log_info(f"Memory appended: {message}") if not self.config.enable_storage: return - + role = message.get("role") stream_id = self.remote_stream_id if role == Role.User else 0 try: @@ -392,7 +520,7 @@ async def _on_memory_appended(self, message: dict) -> None: d.set_property_string("text", message.get("content")) d.set_property_string("role", role) d.set_property_int("stream_id", stream_id) - self.ten_env.send_data(d) + asyncio.create_task(self.ten_env.send_data(d)) except Exception as e: self.ten_env.log_error(f"Error send append_context data {message} {e}") @@ -402,7 +530,7 @@ async def _on_audio(self, buff: bytearray): # Buffer audio if self.connected and len(self.buff) >= self.audio_len_threshold: await self.conn.send_audio_data(self.buff) - self.buff = b'' + self.buff = b"" async def _update_session(self) -> None: tools = [] @@ -416,20 +544,20 @@ def tool_dict(tool: LLMToolMetadata): "type": "object", "properties": {}, "required": [], - "additionalProperties": False - } + "additionalProperties": False, + }, } for param in tool.parameters: t["parameters"]["properties"][param.name] = { "type": param.type, - "description": param.description + "description": param.description, } if param.required: t["parameters"]["required"].append(param.name) return t - + if self.available_tools: tool_prompt = "You have several tools that you can get help from:\n" for t in self.available_tools: @@ -437,39 +565,44 @@ def tool_dict(tool: LLMToolMetadata): self.ctx["tools"] = tool_prompt tools = [tool_dict(t) for t in self.available_tools] prompt = self._replace(self.config.prompt) - + self.ten_env.log_info(f"update session {prompt} {tools}") - su = SessionUpdate(session=SessionUpdateParams( + su = SessionUpdate( + session=SessionUpdateParams( instructions=prompt, model=self.config.model, tool_choice="auto" if self.available_tools else "none", - tools=tools - )) + tools=tools, + ) + ) if self.config.audio_out: - su.session.voice=self.config.voice + su.session.voice = self.config.voice else: - su.session.modalities=["text"] - + su.session.modalities = ["text"] + if self.config.input_transcript: - su.session.input_audio_transcription=InputAudioTranscription( - model="whisper-1") + su.session.input_audio_transcription = InputAudioTranscription( + model="whisper-1" + ) await self.conn.send_request(su) - - async def on_tools_update(self, ten_env: AsyncTenEnv, tool: LLMToolMetadata) -> None: + + async def on_tools_update(self, _: AsyncTenEnv, tool: LLMToolMetadata) -> None: """Called when a new tool is registered. Implement this method to process the new tool.""" self.ten_env.log_info(f"on tools update {tool}") - await self._update_session() - + # await self._update_session() + def _replace(self, prompt: str) -> str: result = prompt for token, value in self.ctx.items(): - result = result.replace("{"+token+"}", value) + result = result.replace("{" + token + "}", value) return result # Direction: OUT - def _on_audio_delta(self, delta: bytes) -> None: + async def _on_audio_delta(self, delta: bytes) -> None: audio_data = base64.b64decode(delta) - self.ten_env.log_debug(f"on_audio_delta audio_data len {len(audio_data)} samples {len(audio_data) // 2}") + self.ten_env.log_debug( + f"on_audio_delta audio_data len {len(audio_data)} samples {len(audio_data) // 2}" + ) self._dump_audio_if_need(audio_data, Role.Assistant) f = AudioFrame.create("pcm_frame") @@ -482,7 +615,7 @@ def _on_audio_delta(self, delta: bytes) -> None: buff = f.lock_buf() buff[:] = audio_data f.unlock_buf(buff) - self.ten_env.send_audio_frame(f) + await self.ten_env.send_audio_frame(f) def _send_transcript(self, content: str, role: Role, is_final: bool) -> None: def is_punctuation(char): @@ -505,7 +638,13 @@ def parse_sentences(sentence_fragment, content): remain = current_sentence # Any remaining characters form the incomplete sentence return sentences, remain - def send_data(ten_env: AsyncTenEnv, sentence: str, stream_id: int, role: str, is_final: bool): + def send_data( + ten_env: AsyncTenEnv, + sentence: str, + stream_id: int, + role: str, + is_final: bool, + ): try: d = Data.create("text_data") d.set_property_string("text", sentence) @@ -513,10 +652,13 @@ def send_data(ten_env: AsyncTenEnv, sentence: str, stream_id: int, role: str, is d.set_property_string("role", role) d.set_property_int("stream_id", stream_id) ten_env.log_info( - f"send transcript text [{sentence}] stream_id {stream_id} is_final {is_final} end_of_segment {is_final} role {role}") - ten_env.send_data(d) + f"send transcript text [{sentence}] stream_id {stream_id} is_final {is_final} end_of_segment {is_final} role {role}" + ) + asyncio.create_task(ten_env.send_data(d)) except Exception as e: - ten_env.log_error(f"Error send text data {role}: {sentence} {is_final} {e}") + ten_env.log_error( + f"Error send text data {role}: {sentence} {is_final} {e}" + ) stream_id = self.remote_stream_id if role == Role.User else 0 try: @@ -527,7 +669,9 @@ def send_data(ten_env: AsyncTenEnv, sentence: str, stream_id: int, role: str, is else: send_data(self.ten_env, content, stream_id, role, is_final) except Exception as e: - self.ten_env.log_error(f"Error send text data {role}: {content} {is_final} {e}") + self.ten_env.log_error( + f"Error send text data {role}: {content} {is_final} {e}" + ) def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: if not self.config.dump: @@ -536,7 +680,9 @@ def _dump_audio_if_need(self, buf: bytearray, role: Role) -> None: with open("{}_{}.pcm".format(role, self.channel_name), "ab") as dump_file: dump_file.write(buf) - async def _handle_tool_call(self, tool_call_id: str, name: str, arguments: str) -> None: + async def _handle_tool_call( + self, tool_call_id: str, name: str, arguments: str + ) -> None: self.ten_env.log_info(f"_handle_tool_call {tool_call_id} {name} {arguments}") cmd: Cmd = Cmd.create(CMD_TOOL_CALL) cmd.set_property_string("name", name) @@ -546,23 +692,26 @@ async def _handle_tool_call(self, tool_call_id: str, name: str, arguments: str) tool_response = ItemCreate( item=FunctionCallOutputItemParam( call_id=tool_call_id, - output="{\"success\":false}", + output='{"success":false}', ) ) if result.get_status_code() == StatusCode.OK: tool_result: LLMToolResult = json.loads( - result.get_property_to_json(CMD_PROPERTY_RESULT)) - + result.get_property_to_json(CMD_PROPERTY_RESULT) + ) + result_content = tool_result["content"] - tool_response.item.output = json.dumps(self._convert_to_content_parts(result_content)) + tool_response.item.output = json.dumps( + self._convert_to_content_parts(result_content) + ) self.ten_env.log_info(f"tool_result: {tool_call_id} {tool_result}") else: - self.ten_env.log_error(f"Tool call failed") - + self.ten_env.log_error("Tool call failed") + await self.conn.send_request(tool_response) await self.conn.send_request(ResponseCreate()) self.ten_env.log_info(f"_remote_tool_call finish {name} {arguments}") - + def _greeting_text(self) -> str: text = "Hi, there." if self.config.language == "zh-CN": @@ -573,86 +722,113 @@ def _greeting_text(self) -> str: text = "안녕하세요" return text - def _convert_tool_params_to_dict(self, tool: LLMToolMetadata): - json = { - "type": "object", - "properties": {}, - "required": [] - } + json_dict = {"type": "object", "properties": {}, "required": []} for param in tool.parameters: - json["properties"][param.name] = { + json_dict["properties"][param.name] = { "type": param.type, - "description": param.description + "description": param.description, } if param.required: - json["required"].append(param.name) + json_dict["required"].append(param.name) - return json - - - def _convert_to_content_parts(self, content: Iterable[LLMChatCompletionContentPartParam]): - content_parts = [] + return json_dict + def _convert_to_content_parts( + self, content: Iterable[LLMChatCompletionContentPartParam] + ): + content_parts = [] if isinstance(content, str): - content_parts.append({ - "type": "text", - "text": content - }) + content_parts.append({"type": "text", "text": content}) else: for part in content: # Only text content is supported currently for v2v model if part["type"] == "text": content_parts.append(part) return content_parts - + async def _greeting(self) -> None: if self.connected and self.users_count == 1: text = self._greeting_text() if self.config.greeting: text = "Say '" + self.config.greeting + "' to me." self.ten_env.log_info(f"send greeting {text}") - await self.conn.send_request(ItemCreate(item=UserMessageItemParam(content=[{"type": ContentType.InputText, "text": text}]))) + await self.conn.send_request( + ItemCreate( + item=UserMessageItemParam( + content=[{"type": ContentType.InputText, "text": text}] + ) + ) + ) await self.conn.send_request(ResponseCreate()) async def _flush(self) -> None: try: c = Cmd.create("flush") await self.ten_env.send_cmd(c) - except: - self.ten_env.log_error(f"Error flush") - + except Exception: + self.ten_env.log_error("Error flush") + async def _update_usage(self, usage: dict) -> None: - self.total_usage.completion_tokens += usage.get("output_tokens") - self.total_usage.prompt_tokens += usage.get("input_tokens") - self.total_usage.total_tokens += usage.get("total_tokens") + self.total_usage.completion_tokens += usage.get("output_tokens") or 0 + self.total_usage.prompt_tokens += usage.get("input_tokens") or 0 + self.total_usage.total_tokens += usage.get("total_tokens") or 0 if not self.total_usage.completion_tokens_details: self.total_usage.completion_tokens_details = LLMCompletionTokensDetails() if not self.total_usage.prompt_tokens_details: self.total_usage.prompt_tokens_details = LLMPromptTokensDetails() if usage.get("output_token_details"): - self.total_usage.completion_tokens_details.accepted_prediction_tokens += usage["output_token_details"].get("text_tokens") - self.total_usage.completion_tokens_details.audio_tokens += usage["output_token_details"].get("audio_tokens") - + self.total_usage.completion_tokens_details.accepted_prediction_tokens += ( + usage["output_token_details"].get("text_tokens") + ) + self.total_usage.completion_tokens_details.audio_tokens += usage[ + "output_token_details" + ].get("audio_tokens") + if usage.get("input_token_details:"): - self.total_usage.prompt_tokens_details.audio_tokens += usage["input_token_details"].get("audio_tokens") - self.total_usage.prompt_tokens_details.cached_tokens += usage["input_token_details"].get("cached_tokens") - self.total_usage.prompt_tokens_details.text_tokens += usage["input_token_details"].get("text_tokens") + self.total_usage.prompt_tokens_details.audio_tokens += usage[ + "input_token_details" + ].get("audio_tokens") + self.total_usage.prompt_tokens_details.cached_tokens += usage[ + "input_token_details" + ].get("cached_tokens") + self.total_usage.prompt_tokens_details.text_tokens += usage[ + "input_token_details" + ].get("text_tokens") self.ten_env.log_info(f"total usage: {self.total_usage}") data = Data.create("llm_stat") data.set_property_from_json("usage", json.dumps(self.total_usage.model_dump())) if self.connect_times and self.completion_times and self.first_token_times: - data.set_property_from_json("latency", json.dumps({ - "connection_latency_95": np.percentile(self.connect_times, 95), - "completion_latency_95": np.percentile(self.completion_times, 95), - "first_token_latency_95": np.percentile(self.first_token_times, 95), - "connection_latency_99": np.percentile(self.connect_times, 99), - "completion_latency_99": np.percentile(self.completion_times, 99), - "first_token_latency_99": np.percentile(self.first_token_times, 99) - })) - self.ten_env.send_data(data) + data.set_property_from_json( + "latency", + json.dumps( + { + "connection_latency_95": np.percentile(self.connect_times, 95), + "completion_latency_95": np.percentile( + self.completion_times, 95 + ), + "first_token_latency_95": np.percentile( + self.first_token_times, 95 + ), + "connection_latency_99": np.percentile(self.connect_times, 99), + "completion_latency_99": np.percentile( + self.completion_times, 99 + ), + "first_token_latency_99": np.percentile( + self.first_token_times, 99 + ), + } + ), + ) + asyncio.create_task(self.ten_env.send_data(data)) + + async def on_call_chat_completion(self, async_ten_env, **kargs): + raise NotImplementedError + + async def on_data_chat_completion(self, async_ten_env, **kargs): + raise NotImplementedError diff --git a/agents/ten_packages/extension/openai_v2v_python/manifest.json b/agents/ten_packages/extension/openai_v2v_python/manifest.json index 264b8b8e..a2a957fa 100644 --- a/agents/ten_packages/extension/openai_v2v_python/manifest.json +++ b/agents/ten_packages/extension/openai_v2v_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -113,21 +113,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/extension/polly_tts/BUILD.gn b/agents/ten_packages/extension/polly_tts/BUILD.gn index 16a3f9f3..ddc17463 100644 --- a/agents/ten_packages/extension/polly_tts/BUILD.gn +++ b/agents/ten_packages/extension/polly_tts/BUILD.gn @@ -14,6 +14,5 @@ ten_package("polly_tts") { "extension.py", "manifest.json", "property.json", - "tests", ] } \ No newline at end of file diff --git a/agents/ten_packages/extension/polly_tts/extension.py b/agents/ten_packages/extension/polly_tts/extension.py index 52b68872..ad8a659c 100644 --- a/agents/ten_packages/extension/polly_tts/extension.py +++ b/agents/ten_packages/extension/polly_tts/extension.py @@ -4,6 +4,7 @@ from ten import ( AsyncTenEnv, ) + PROPERTY_REGION = "region" # Optional PROPERTY_ACCESS_KEY = "access_key" # Optional PROPERTY_SECRET_KEY = "secret_key" # Optional @@ -12,6 +13,7 @@ PROPERTY_SAMPLE_RATE = "sample_rate" # Optional PROPERTY_LANG_CODE = "lang_code" # Optional + class PollyTTSExtension(AsyncTTSBaseExtension): def __init__(self, name: str): super().__init__(name) @@ -26,13 +28,13 @@ async def on_start(self, ten_env: AsyncTenEnv) -> None: try: await super().on_start(ten_env) ten_env.log_debug("on_start") - self.config = PollyTTSConfig.create(ten_env=ten_env) + self.config = await PollyTTSConfig.create_async(ten_env=ten_env) if not self.config.access_key or not self.config.secret_key: raise ValueError("access_key and secret_key are required") - + self.client = PollyTTS(self.config, ten_env) - except Exception as err: + except Exception: ten_env.log_error(f"on_start failed: {traceback.format_exc()}") async def on_stop(self, ten_env: AsyncTenEnv) -> None: @@ -45,13 +47,17 @@ async def on_deinit(self, ten_env: AsyncTenEnv) -> None: await super().on_deinit(ten_env) ten_env.log_debug("on_deinit") - async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + async def on_request_tts( + self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool + ) -> None: try: data = self.client.text_to_speech_stream(ten_env, input_text) async for frame in data: - self.send_audio_out(ten_env, frame, sample_rate=self.client.config.sample_rate) - except Exception as err: + await self.send_audio_out( + ten_env, frame, sample_rate=self.client.config.sample_rate + ) + except Exception: ten_env.log_error(f"on_request_tts failed: {traceback.format_exc()}") async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: - return await super().on_cancel_tts(ten_env) \ No newline at end of file + return await super().on_cancel_tts(ten_env) diff --git a/agents/ten_packages/extension/polly_tts/manifest.json b/agents/ten_packages/extension/polly_tts/manifest.json index 30c3d478..74181c21 100644 --- a/agents/ten_packages/extension/polly_tts/manifest.json +++ b/agents/ten_packages/extension/polly_tts/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "polly_tts", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/polly_tts/polly_tts.py b/agents/ten_packages/extension/polly_tts/polly_tts.py index 760e3983..26a5073f 100644 --- a/agents/ten_packages/extension/polly_tts/polly_tts.py +++ b/agents/ten_packages/extension/polly_tts/polly_tts.py @@ -8,19 +8,23 @@ from botocore.exceptions import ClientError from contextlib import closing + @dataclass class PollyTTSConfig(BaseConfig): region: str = "us-east-1" access_key: str = "" secret_key: str = "" engine: str = "generative" - voice: str = "Matthew" # https://docs.aws.amazon.com/polly/latest/dg/available-voices.html + voice: str = ( + "Matthew" # https://docs.aws.amazon.com/polly/latest/dg/available-voices.html + ) sample_rate: int = 16000 - lang_code: str = 'en-US' + lang_code: str = "en-US" bytes_per_sample: int = 2 include_visemes: bool = False number_of_channels: int = 1 - audio_format: str = 'pcm' + audio_format: str = "pcm" + class PollyTTS: def __init__(self, config: PollyTTSConfig, ten_env: AsyncTenEnv) -> None: @@ -30,12 +34,14 @@ def __init__(self, config: PollyTTSConfig, ten_env: AsyncTenEnv) -> None: ten_env.log_info("startinit polly tts") self.config = config if config.access_key and config.secret_key: - self.client = boto3.client(service_name='polly', - region_name=config.region, - aws_access_key_id=config.access_key, - aws_secret_access_key=config.secret_key) + self.client = boto3.client( + service_name="polly", + region_name=config.region, + aws_access_key_id=config.access_key, + aws_secret_access_key=config.secret_key, + ) else: - self.client = boto3.client(service_name='polly', region_name=config.region) + self.client = boto3.client(service_name="polly", region_name=config.region) self.voice_metadata = None self.frame_size = int( @@ -81,14 +87,16 @@ def _synthesize(self, text, ten_env: AsyncTenEnv): else: return audio_stream, visemes - async def text_to_speech_stream(self, ten_env: AsyncTenEnv, text: str) -> AsyncIterator[bytes]: + async def text_to_speech_stream( + self, ten_env: AsyncTenEnv, text: str + ) -> AsyncIterator[bytes]: inputText = text if len(inputText) == 0: - ten_env.log_warning("async_polly_handler: empty input detected.") + ten_env.log_warning("async_polly_handler: empty input detected.") try: - audio_stream, visemes = self._synthesize(inputText, ten_env) + audio_stream, _ = self._synthesize(inputText, ten_env) with closing(audio_stream) as stream: - for chunk in stream.iter_chunks(chunk_size=self.frame_size): - yield chunk - except Exception as e: - ten_env.log_error(traceback.format_exc()) \ No newline at end of file + for chunk in stream.iter_chunks(chunk_size=self.frame_size): + yield chunk + except Exception: + ten_env.log_error(traceback.format_exc()) diff --git a/agents/ten_packages/extension/qwen_llm_python/__init__.py b/agents/ten_packages/extension/qwen_llm_python/__init__.py index 3d3b6a9c..43f1c856 100644 --- a/agents/ten_packages/extension/qwen_llm_python/__init__.py +++ b/agents/ten_packages/extension/qwen_llm_python/__init__.py @@ -1,4 +1 @@ from . import qwen_llm_addon -from .log import logger - -logger.info("qwen_llm_python extension loaded") diff --git a/agents/ten_packages/extension/qwen_llm_python/log.py b/agents/ten_packages/extension/qwen_llm_python/log.py deleted file mode 100644 index 98c23228..00000000 --- a/agents/ten_packages/extension/qwen_llm_python/log.py +++ /dev/null @@ -1,13 +0,0 @@ -import logging - -logger = logging.getLogger("qwen_llm_python") -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/qwen_llm_python/manifest.json b/agents/ten_packages/extension/qwen_llm_python/manifest.json index 1227bd78..732da271 100644 --- a/agents/ten_packages/extension/qwen_llm_python/manifest.json +++ b/agents/ten_packages/extension/qwen_llm_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "qwen_llm_python", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/qwen_llm_python/qwen_llm_addon.py b/agents/ten_packages/extension/qwen_llm_python/qwen_llm_addon.py index 7fe49cbd..cb7201cf 100644 --- a/agents/ten_packages/extension/qwen_llm_python/qwen_llm_addon.py +++ b/agents/ten_packages/extension/qwen_llm_python/qwen_llm_addon.py @@ -15,10 +15,8 @@ @register_addon_as_extension("qwen_llm_python") class QWenLLMExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context): - from .log import logger - logger.info("on_create_instance") - from .qwen_llm_extension import QWenLLMExtension + ten.log_info("on_create_instance") ten.on_create_instance_done(QWenLLMExtension(addon_name), context) diff --git a/agents/ten_packages/extension/qwen_llm_python/qwen_llm_extension.py b/agents/ten_packages/extension/qwen_llm_python/qwen_llm_extension.py index 06905e59..94d9211f 100644 --- a/agents/ten_packages/extension/qwen_llm_python/qwen_llm_extension.py +++ b/agents/ten_packages/extension/qwen_llm_python/qwen_llm_extension.py @@ -21,11 +21,11 @@ import threading import re from http import HTTPStatus -from .log import logger DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT = "end_of_segment" + class QWenLLMExtension(Extension): def __init__(self, name: str): super().__init__(name) @@ -104,10 +104,8 @@ def callback(text: str, end_of_segment: bool): nonlocal curr_ttfs if curr_ttfs is None: curr_ttfs = datetime.now() - start_time - logger.info( - "TTFS {}ms, sentence {} end_of_segment {}".format( - int(curr_ttfs.total_seconds() * 1000), text, end_of_segment - ) + ten.log_info( + f"TTFS {int(curr_ttfs.total_seconds() * 1000)}ms, sentence {text} end_of_segment {end_of_segment}" ) cmd_result = CmdResult.create(StatusCode.OK) @@ -116,7 +114,7 @@ def callback(text: str, end_of_segment: bool): cmd_result.set_is_final(True) # end of streaming return else: cmd_result.set_is_final(False) # keep streaming return - logger.info("call_chat cmd return_result {}".format(cmd_result.to_json())) + ten.log_info(f"call_chat cmd return_result {cmd_result.to_json()}") ten.return_result(cmd_result, cmd) messages_str = cmd.get_property_string("messages") @@ -124,8 +122,8 @@ def callback(text: str, end_of_segment: bool): stream = False try: stream = cmd.get_property_bool("stream") - except Exception as e: - logger.warning("stream property not found, default to False") + except Exception: + ten.log_warn("stream property not found, default to False") if stream: self.stream_chat(ts, messages, callback) @@ -134,10 +132,11 @@ def callback(text: str, end_of_segment: bool): callback(total, True) # callback once until full answer returned def stream_chat(self, ts: datetime.time, messages: List[Any], callback): - logger.info("before stream_chat call {} {}".format(messages, ts)) + ten = self.ten + ten.log_info(f"before stream_chat call {messages} {ts}") if self.need_interrupt(ts): - logger.warning("out of date, %s, %s", self.get_outdate_ts(), ts) + ten.log_warn("out of date, %s, %s", self.get_outdate_ts(), ts) return responses = dashscope.Generation.call( @@ -152,7 +151,7 @@ def stream_chat(self, ts: datetime.time, messages: List[Any], callback): partial = "" for response in responses: if self.need_interrupt(ts): - logger.warning("out of date, %s, %s", self.get_outdate_ts(), ts) + ten.log_warn("out of date, %s, %s", self.get_outdate_ts(), ts) partial = "" # discard not sent break if response.status_code == HTTPStatus.OK: @@ -170,24 +169,19 @@ def stream_chat(self, ts: datetime.time, messages: List[Any], callback): callback(sentence, False) else: - logger.warning( - "request_id: {}, status_code: {}, error code: {}, error message: {}".format( - response.request_id, - response.status_code, - response.code, - response.message, - ) + ten.log_warn( + f"request_id: {response.request_id}, status_code: {response.status_code}, error code: {response.code}, error message: {response.message}" ) break # always send end_of_segment if callback is not None: callback(partial, True) - logger.info("stream_chat full_answer {}".format(total)) + ten.log_info(f"stream_chat full_answer {total}") return total def on_start(self, ten: TenEnv) -> None: - logger.info("on_start") + ten.log_info("on_start") self.api_key = ten.get_property_string("api_key") self.model = ten.get_property_string("model") self.prompt = ten.get_property_string("prompt") @@ -197,13 +191,16 @@ def on_start(self, ten: TenEnv) -> None: if greeting: try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) ten.send_data(output_data) - logger.info(f"greeting [{greeting}] sent") + ten.log_info(f"greeting [{greeting}] sent") except Exception as e: - logger.error(f"greeting [{greeting}] send failed, err: {e}") - + ten.log_error(f"greeting [{greeting}] send failed, err: {e}") dashscope.api_key = self.api_key self.thread = threading.Thread(target=self.async_handle, args=[ten]) @@ -211,7 +208,7 @@ def on_start(self, ten: TenEnv) -> None: ten.on_start_done() def on_stop(self, ten: TenEnv) -> None: - logger.info("on_stop") + ten.log_info("on_stop") self.stopped = True self.flush() self.queue.put(None) @@ -228,19 +225,19 @@ def flush(self): self.queue.get() def on_data(self, ten: TenEnv, data: Data) -> None: - logger.info("on_data") + ten.log_info("on_data") is_final = data.get_property_bool("is_final") if not is_final: - logger.info("ignore non final") + ten.log_info("ignore non final") return input_text = data.get_property_string("text") if len(input_text) == 0: - logger.info("ignore empty text") + ten.log_info("ignore empty text") return ts = datetime.now() - logger.info("on data %s, %s", input_text, ts) + ten.log_info("on data %s, %s", input_text, ts) self.queue.put((input_text, ts)) def async_handle(self, ten: TenEnv): @@ -249,36 +246,36 @@ def async_handle(self, ten: TenEnv): value = self.queue.get() if value is None: break - input, ts = value + chat_input, ts = value if self.need_interrupt(ts): continue - if isinstance(input, str): - logger.info("fetched from queue {}".format(input)) - self.complete_with_history(ten, ts, input) + if isinstance(chat_input, str): + ten.log_info(f"fetched from queue {chat_input}") + self.complete_with_history(ten, ts, chat_input) else: - logger.info("fetched from queue {}".format(input.get_name())) - self.call_chat(ten, ts, input) + ten.log_info(f"fetched from queue {chat_input.get_name()}") + self.call_chat(ten, ts, chat_input) except Exception as e: - logger.exception(e) + ten.log_error(str(e)) def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: ts = datetime.now() cmd_name = cmd.get_name() - logger.info("on_cmd {}, {}".format(cmd_name, ts)) + ten.log_info(f"on_cmd {cmd_name}, {ts}") if cmd_name == "flush": self.flush() cmd_out = Cmd.create("flush") ten.send_cmd( cmd_out, - lambda ten, result: logger.info("send_cmd flush done"), + lambda ten, result: ten.log_info("send_cmd flush done"), ) elif cmd_name == "call_chat": self.queue.put((cmd, ts)) return # cmd_result will be returned once it's processed else: - logger.info("unknown cmd {}".format(cmd_name)) + ten.log_info(f"unknown cmd {cmd_name}") cmd_result = CmdResult.create(StatusCode.OK) ten.return_result(cmd_result, cmd) diff --git a/agents/ten_packages/extension/transcribe_asr_python/__init__.py b/agents/ten_packages/extension/transcribe_asr_python/__init__.py index 35742868..61ab1b45 100644 --- a/agents/ten_packages/extension/transcribe_asr_python/__init__.py +++ b/agents/ten_packages/extension/transcribe_asr_python/__init__.py @@ -1,5 +1 @@ from . import transcribe_asr_addon -from .extension import EXTENSION_NAME -from .log import logger - -logger.info(f"{EXTENSION_NAME} extension loaded") diff --git a/agents/ten_packages/extension/transcribe_asr_python/extension.py b/agents/ten_packages/extension/transcribe_asr_python/extension.py deleted file mode 100644 index c7953418..00000000 --- a/agents/ten_packages/extension/transcribe_asr_python/extension.py +++ /dev/null @@ -1 +0,0 @@ -EXTENSION_NAME = "transcribe_asr_python" diff --git a/agents/ten_packages/extension/transcribe_asr_python/log.py b/agents/ten_packages/extension/transcribe_asr_python/log.py deleted file mode 100644 index 032573f4..00000000 --- a/agents/ten_packages/extension/transcribe_asr_python/log.py +++ /dev/null @@ -1,14 +0,0 @@ -import logging -from .extension import EXTENSION_NAME - -logger = logging.getLogger(EXTENSION_NAME) -logger.setLevel(logging.INFO) - -formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" -) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/transcribe_asr_python/manifest.json b/agents/ten_packages/extension/transcribe_asr_python/manifest.json index 95403cc8..810b2bb2 100644 --- a/agents/ten_packages/extension/transcribe_asr_python/manifest.json +++ b/agents/ten_packages/extension/transcribe_asr_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "transcribe_asr_python", - "version": "0.4.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": { diff --git a/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_addon.py b/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_addon.py index ad9c5391..80ee94b4 100644 --- a/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_addon.py +++ b/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_addon.py @@ -3,13 +3,11 @@ register_addon_as_extension, TenEnv, ) -from .extension import EXTENSION_NAME -@register_addon_as_extension(EXTENSION_NAME) +@register_addon_as_extension("transcribe_asr_python") class TranscribeAsrExtensionAddon(Addon): def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None: - from .log import logger from .transcribe_asr_extension import TranscribeAsrExtension - logger.info("on_create_instance") + ten.log_info("on_create_instance") ten.on_create_instance_done(TranscribeAsrExtension(addon_name), context) diff --git a/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_extension.py b/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_extension.py index fb0307ee..c2d17cfd 100644 --- a/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_extension.py +++ b/agents/ten_packages/extension/transcribe_asr_python/transcribe_asr_extension.py @@ -10,7 +10,6 @@ import asyncio import threading -from .log import logger from .transcribe_wrapper import AsyncTranscribeWrapper, TranscribeConfig PROPERTY_REGION = "region" # Optional @@ -33,7 +32,7 @@ def __init__(self, name: str): asyncio.set_event_loop(self.loop) def on_start(self, ten: TenEnv) -> None: - logger.info("TranscribeAsrExtension on_start") + ten.log_info("TranscribeAsrExtension on_start") transcribe_config = TranscribeConfig.default_config() @@ -49,7 +48,7 @@ def on_start(self, ten: TenEnv) -> None: if value: transcribe_config.__setattr__(optional_param, value) except Exception as err: - logger.debug( + ten.log_debug( f"GetProperty optional {optional_param} failed, err: {err}. Using default value: {transcribe_config.__getattribute__(optional_param)}" ) @@ -57,30 +56,30 @@ def on_start(self, ten: TenEnv) -> None: transcribe_config, self.queue, ten, self.loop ) - logger.info("Starting async_transcribe_wrapper thread") + ten.log_info("Starting async_transcribe_wrapper thread") self.thread = threading.Thread(target=self.transcribe.run, args=[]) self.thread.start() ten.on_start_done() - def put_pcm_frame(self, pcm_frame: AudioFrame) -> None: + def put_pcm_frame(self, ten: TenEnv, pcm_frame: AudioFrame) -> None: try: asyncio.run_coroutine_threadsafe( self.queue.put(pcm_frame), self.loop ).result(timeout=0.1) except asyncio.QueueFull: - logger.exception("Queue is full, dropping frame") + ten.log_error("Queue is full, dropping frame") except Exception as e: - logger.exception(f"Error putting frame in queue: {e}") + ten.log_error(f"Error putting frame in queue: {e}") def on_audio_frame(self, ten: TenEnv, frame: AudioFrame) -> None: - self.put_pcm_frame(pcm_frame=frame) + self.put_pcm_frame(ten, pcm_frame=frame) def on_stop(self, ten: TenEnv) -> None: - logger.info("TranscribeAsrExtension on_stop") + ten.log_info("TranscribeAsrExtension on_stop") # put an empty frame to stop transcribe_wrapper - self.put_pcm_frame(None) + self.put_pcm_frame(ten, None) self.stopped = True self.thread.join() self.loop.stop() @@ -89,12 +88,12 @@ def on_stop(self, ten: TenEnv) -> None: ten.on_stop_done() def on_cmd(self, ten: TenEnv, cmd: Cmd) -> None: - logger.info("TranscribeAsrExtension on_cmd") + ten.log_info("TranscribeAsrExtension on_cmd") cmd_json = cmd.to_json() - logger.info("TranscribeAsrExtension on_cmd json: " + cmd_json) + ten.log_info(f"TranscribeAsrExtension on_cmd json: {cmd_json}") cmdName = cmd.get_name() - logger.info("got cmd %s" % cmdName) + ten.log_info(f"got cmd {cmdName}") cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") diff --git a/agents/ten_packages/extension/transcribe_asr_python/transcribe_wrapper.py b/agents/ten_packages/extension/transcribe_asr_python/transcribe_wrapper.py index c6b500ba..1a436d97 100644 --- a/agents/ten_packages/extension/transcribe_asr_python/transcribe_wrapper.py +++ b/agents/ten_packages/extension/transcribe_asr_python/transcribe_wrapper.py @@ -1,22 +1,22 @@ -from typing import Union import asyncio -from ten import ( - TenEnv, - Data -) +from ten import TenEnv, Data from amazon_transcribe.auth import StaticCredentialResolver from amazon_transcribe.client import TranscribeStreamingClient from amazon_transcribe.handlers import TranscriptResultStreamHandler -from amazon_transcribe.model import TranscriptEvent, TranscriptResultStream, StartStreamTranscriptionEventStream +from amazon_transcribe.model import ( + TranscriptEvent, + TranscriptResultStream, + StartStreamTranscriptionEventStream, +) -from .log import logger from .transcribe_config import TranscribeConfig DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" + def create_and_send_data(ten: TenEnv, text_result: str, is_final: bool): stable_data = Data.create("text_data") stable_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_IS_FINAL, is_final) @@ -24,30 +24,38 @@ def create_and_send_data(ten: TenEnv, text_result: str, is_final: bool): ten.send_data(stable_data) -class AsyncTranscribeWrapper(): - def __init__(self, config: TranscribeConfig, queue: asyncio.Queue, ten:TenEnv, loop: asyncio.BaseEventLoop): +class AsyncTranscribeWrapper: + def __init__( + self, + config: TranscribeConfig, + queue: asyncio.Queue, + ten: TenEnv, + loop: asyncio.BaseEventLoop, + ): self.queue = queue self.ten = ten self.stopped = False self.config = config self.loop = loop + self.stream = None + self.handler = None + self.event_handler_task = None if config.access_key and config.secret_key: - logger.info(f"init trascribe client with access key: {config.access_key}") + ten.log_info(f"init trascribe client with access key: {config.access_key}") self.transcribe_client = TranscribeStreamingClient( region=config.region, credential_resolver=StaticCredentialResolver( - access_key_id=config.access_key, - secret_access_key=config.secret_key - ) + access_key_id=config.access_key, secret_access_key=config.secret_key + ), ) else: - logger.info(f"init trascribe client without access key, using default credentials provider chain.") - - self.transcribe_client = TranscribeStreamingClient( - region=config.region + ten.log_info( + "init trascribe client without access key, using default credentials provider chain." ) + self.transcribe_client = TranscribeStreamingClient(region=config.region) + asyncio.set_event_loop(self.loop) self.reset_stream() @@ -59,11 +67,11 @@ def reset_stream(self): async def cleanup(self): if self.stream: await self.stream.input_stream.end_stream() - logger.info("cleanup: stream ended.") + self.ten.log_info("cleanup: stream ended.") if self.event_handler_task: await self.event_handler_task - logger.info("cleanup: event handler ended.") + self.ten.log_info("cleanup: event handler ended.") self.reset_stream() @@ -73,7 +81,7 @@ async def create_stream(self) -> bool: self.handler = TranscribeEventHandler(self.stream.output_stream, self.ten) self.event_handler_task = asyncio.create_task(self.handler.handle_events()) except Exception as e: - logger.exception(e) + self.ten.log_error(str(e)) return False return True @@ -84,16 +92,16 @@ async def send_frame(self) -> None: pcm_frame = await asyncio.wait_for(self.queue.get(), timeout=10.0) if pcm_frame is None: - logger.warning("send_frame: exit due to None value got.") + self.ten.log_warn("send_frame: exit due to None value got.") return frame_buf = pcm_frame.get_buf() if not frame_buf: - logger.warning("send_frame: empty pcm_frame detected.") + self.ten.log_warn("send_frame: empty pcm_frame detected.") continue if not self.stream: - logger.info("lazy init stream.") + self.ten.log_info("lazy init stream.") if not await self.create_stream(): continue @@ -102,22 +110,24 @@ async def send_frame(self) -> None: except asyncio.TimeoutError: if self.stream: await self.cleanup() - logger.debug("send_frame: no data for 10s, will close current stream and create a new one when receving new frame.") + self.ten.log_debug( + "send_frame: no data for 10s, will close current stream and create a new one when receving new frame." + ) else: - logger.debug("send_frame: waiting for pcm frame.") + self.ten.log_debug("send_frame: waiting for pcm frame.") except IOError as e: - logger.exception(f"Error in send_frame: {e}") + self.ten.log_error(f"Error in send_frame: {e}") except Exception as e: - logger.exception(f"Error in send_frame: {e}") + self.ten.log_error(f"Error in send_frame: {e}") raise e - logger.info("send_frame: exit due to self.stopped == True") + self.ten.log_info("send_frame: exit due to self.stopped == True") async def transcribe_loop(self) -> None: try: await self.send_frame() except Exception as e: - logger.exception(e) + self.ten.log_error(str(e)) finally: await self.cleanup() @@ -132,7 +142,7 @@ async def get_transcribe_stream(self) -> StartStreamTranscriptionEventStream: def run(self) -> None: self.loop.run_until_complete(self.transcribe_loop()) self.loop.close() - logger.info("async_transcribe_wrapper: thread completed.") + self.ten.log_info("async_transcribe_wrapper: thread completed.") def stop(self) -> None: self.stopped = True @@ -160,6 +170,6 @@ async def handle_transcript_event(self, transcript_event: TranscriptEvent) -> No if not text_result: return - logger.info(f"got transcript: [{text_result}], is_final: [{is_final}]") + self.ten.log_info(f"got transcript: [{text_result}], is_final: [{is_final}]") create_and_send_data(ten=self.ten, text_result=text_result, is_final=is_final) diff --git a/agents/ten_packages/extension/tsdb_firestore/BUILD.gn b/agents/ten_packages/extension/tsdb_firestore/BUILD.gn index 66830a25..a71d1642 100644 --- a/agents/ten_packages/extension/tsdb_firestore/BUILD.gn +++ b/agents/ten_packages/extension/tsdb_firestore/BUILD.gn @@ -14,7 +14,6 @@ ten_package("tsdb_firestore") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", ] diff --git a/agents/ten_packages/extension/tsdb_firestore/__init__.py b/agents/ten_packages/extension/tsdb_firestore/__init__.py index 0f296203..8cd75dde 100644 --- a/agents/ten_packages/extension/tsdb_firestore/__init__.py +++ b/agents/ten_packages/extension/tsdb_firestore/__init__.py @@ -6,6 +6,3 @@ # # from . import addon -from .log import logger - -logger.info("tsdb_firestore extension loaded") diff --git a/agents/ten_packages/extension/tsdb_firestore/addon.py b/agents/ten_packages/extension/tsdb_firestore/addon.py index e994e72b..e1ad56ce 100644 --- a/agents/ten_packages/extension/tsdb_firestore/addon.py +++ b/agents/ten_packages/extension/tsdb_firestore/addon.py @@ -17,6 +17,5 @@ class TSDBFirestoreExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: from .extension import TSDBFirestoreExtension - from .log import logger - logger.info("TSDBFirestoreExtensionAddon on_create_instance") + ten_env.log_info("TSDBFirestoreExtensionAddon on_create_instance") ten_env.on_create_instance_done(TSDBFirestoreExtension(name), context) diff --git a/agents/ten_packages/extension/tsdb_firestore/extension.py b/agents/ten_packages/extension/tsdb_firestore/extension.py index 1d58fcfe..3ff50652 100644 --- a/agents/ten_packages/extension/tsdb_firestore/extension.py +++ b/agents/ten_packages/extension/tsdb_firestore/extension.py @@ -24,7 +24,6 @@ import queue import threading import json -from .log import logger from typing import List, Any DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" @@ -45,7 +44,8 @@ CONTENT_TS_PATH = "ts" CONTENT_STREAM_ID_PATH = "stream_id" CONTENT_INPUT_PATH = "input" -DEFAULT_TTL = 1 # days +DEFAULT_TTL = 1 # days + def get_current_time(): # Get the current time @@ -54,6 +54,7 @@ def get_current_time(): unix_microseconds = int(start_time.timestamp() * 1_000_000) return unix_microseconds + def order_by_ts(contents: List[str]) -> List[Any]: tmp = [] for c in contents: @@ -61,18 +62,27 @@ def order_by_ts(contents: List[str]) -> List[Any]: sorted_contents = sorted(tmp, key=lambda x: x[CONTENT_TS_PATH]) res = [] for sc in sorted_contents: - res.append({CONTENT_ROLE_PATH: sc[CONTENT_ROLE_PATH], CONTENT_INPUT_PATH: sc[CONTENT_INPUT_PATH], CONTENT_STREAM_ID_PATH: sc.get(CONTENT_STREAM_ID_PATH, 0)}) + res.append( + { + CONTENT_ROLE_PATH: sc[CONTENT_ROLE_PATH], + CONTENT_INPUT_PATH: sc[CONTENT_INPUT_PATH], + CONTENT_STREAM_ID_PATH: sc.get(CONTENT_STREAM_ID_PATH, 0), + } + ) return res + @firestore.transactional def update_in_transaction(transaction, doc_ref, content): transaction.update(doc_ref, content) + @firestore.transactional def read_in_transaction(transaction, doc_ref): doc = doc_ref.get(transaction=transaction) return doc.to_dict() + class TSDBFirestoreExtension(Extension): def __init__(self, name: str): super().__init__(name) @@ -93,7 +103,7 @@ def __init__(self, name: str): self.cache = "" async def __thread_routine(self, ten_env: TenEnv): - logger.info("__thread_routine start") + ten_env.log_info("__thread_routine start") self.loop = asyncio.get_running_loop() ten_env.on_start_done() await self.stopEvent.wait() @@ -102,28 +112,34 @@ async def stop_thread(self): self.stopEvent.set() def on_init(self, ten_env: TenEnv) -> None: - logger.info("TSDBFirestoreExtension on_init") + ten_env.log_info("TSDBFirestoreExtension on_init") ten_env.on_init_done() def on_start(self, ten_env: TenEnv) -> None: - logger.info("TSDBFirestoreExtension on_start") + ten_env.log_info("TSDBFirestoreExtension on_start") try: self.credentials = ten_env.get_property_to_json(PROPERTY_CREDENTIALS) except Exception as err: - logger.error(f"GetProperty required {PROPERTY_CREDENTIALS} failed, err: {err}") - return - + ten_env.log_error( + f"GetProperty required {PROPERTY_CREDENTIALS} failed, err: {err}" + ) + return + try: self.channel_name = ten_env.get_property_string(PROPERTY_CHANNEL_NAME) except Exception as err: - logger.error(f"GetProperty required {PROPERTY_CHANNEL_NAME} failed, err: {err}") - return + ten_env.log_error( + f"GetProperty required {PROPERTY_CHANNEL_NAME} failed, err: {err}" + ) + return try: self.collection_name = ten_env.get_property_string(PROPERTY_COLLECTION_NAME) except Exception as err: - logger.error(f"GetProperty required {PROPERTY_COLLECTION_NAME} failed, err: {err}") + ten_env.log_error( + f"GetProperty required {PROPERTY_COLLECTION_NAME} failed, err: {err}" + ) return # start firestore db @@ -131,27 +147,25 @@ def on_start(self, ten_env: TenEnv) -> None: firebase_admin.initialize_app(cred) self.client = firestore.client() - self.document_ref = self.client.collection(self.collection_name).document(self.channel_name) + self.document_ref = self.client.collection(self.collection_name).document( + self.channel_name + ) # update ttl expiration_time = datetime.datetime.now() + datetime.timedelta(days=self.ttl) exists = self.document_ref.get().exists if exists: - self.document_ref.update( - { - DOC_EXPIRE_PATH: expiration_time - } + self.document_ref.update({DOC_EXPIRE_PATH: expiration_time}) + ten_env.log_info( + f"reset document ttl, {self.ttl} day(s), for the channel {self.channel_name}" ) - logger.info(f"reset document ttl, {self.ttl} day(s), for the channel {self.channel_name}") else: # not exists yet, set to create one - self.document_ref.set( - { - DOC_EXPIRE_PATH: expiration_time - } + self.document_ref.set({DOC_EXPIRE_PATH: expiration_time}) + ten_env.log_info( + f"create new document and set ttl, {self.ttl} day(s), for the channel {self.channel_name}" ) - logger.info(f"create new document and set ttl, {self.ttl} day(s), for the channel {self.channel_name}") - # start the loop to handle data in + # start the loop to handle data in self.thread = threading.Thread(target=self.async_handle, args=[ten_env]) self.thread.start() @@ -166,24 +180,31 @@ def async_handle(self, ten_env: TenEnv) -> None: try: value = self.queue.get() if value is None: - logger.info("exit handle loop") + ten_env.log_info("exit handle loop") break - ts, input, role, stream_id = value - content_str = json.dumps({CONTENT_ROLE_PATH: role, CONTENT_INPUT_PATH: input, CONTENT_TS_PATH: ts, CONTENT_STREAM_ID_PATH: stream_id}) - update_in_transaction( - self.client.transaction(), - self.document_ref, + ts, input_path, role, stream_id = value + content_str = json.dumps( { - DOC_CONTENTS_PATH: firestore.ArrayUnion([content_str]) + CONTENT_ROLE_PATH: role, + CONTENT_INPUT_PATH: input_path, + CONTENT_TS_PATH: ts, + CONTENT_STREAM_ID_PATH: stream_id, } ) - logger.info(f"append {content_str} to firestore document {self.channel_name}") - except Exception as e: - logger.exception("Failed to store chat contents") + update_in_transaction( + self.client.transaction(), + self.document_ref, + {DOC_CONTENTS_PATH: firestore.ArrayUnion([content_str])}, + ) + ten_env.log_info( + f"append {content_str} to firestore document {self.channel_name}" + ) + except Exception: + ten_env.log_error("Failed to store chat contents") def on_stop(self, ten_env: TenEnv) -> None: - logger.info("TSDBFirestoreExtension on_stop") - + ten_env.log_info("TSDBFirestoreExtension on_stop") + # clear the queue and stop the thread to process data in self.stopped = True while not self.queue.empty(): @@ -202,60 +223,62 @@ def on_stop(self, ten_env: TenEnv) -> None: ten_env.on_stop_done() def on_deinit(self, ten_env: TenEnv) -> None: - logger.info("TSDBFirestoreExtension on_deinit") + ten_env.log_info("TSDBFirestoreExtension on_deinit") ten_env.on_deinit_done() def on_cmd(self, ten_env: TenEnv, cmd: Cmd) -> None: try: cmd_name = cmd.get_name() - logger.info("on_cmd name {}".format(cmd_name)) + ten_env.log_info(f"on_cmd name {cmd_name}") if cmd_name == RETRIEVE_CMD: - asyncio.run_coroutine_threadsafe( - self.retrieve(ten_env, cmd), self.loop - ) + asyncio.run_coroutine_threadsafe(self.retrieve(ten_env, cmd), self.loop) else: - logger.info("unknown cmd name {}".format(cmd_name)) + ten_env.log_info(f"unknown cmd name {cmd_name}") cmd_result = CmdResult.create(StatusCode.ERROR) ten_env.return_result(cmd_result, cmd) - except Exception as e: + except Exception: ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) - + async def retrieve(self, ten_env: TenEnv, cmd: Cmd): try: doc_dict = read_in_transaction(self.client.transaction(), self.document_ref) if DOC_CONTENTS_PATH in doc_dict: contents = doc_dict[DOC_CONTENTS_PATH] - logger.info(f"after retrieve {contents}") + ten_env.log_info(f"after retrieve {contents}") ret = CmdResult.create(StatusCode.OK) - ret.set_property_string(CMD_OUT_PROPERTY_RESPONSE, json.dumps(order_by_ts(contents))) + ret.set_property_string( + CMD_OUT_PROPERTY_RESPONSE, json.dumps(order_by_ts(contents)) + ) ten_env.return_result(ret, cmd) else: - logger.info(f"no contents for the channel {self.channel_name} yet") - ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) - except Exception as e: - logger.exception(f"Failed to read the document for the channel {self.channel_name}") - ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) + ten_env.log_info(f"no contents for the channel {self.channel_name} yet") + ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) + except Exception: + ten_env.log_error( + f"Failed to read the document for the channel {self.channel_name}" + ) + ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) def on_data(self, ten_env: TenEnv, data: Data) -> None: - logger.info(f"TSDBFirestoreExtension on_data") + ten_env.log_info("TSDBFirestoreExtension on_data") # assume 'data' is an object from which we can get properties is_final = False try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) if not is_final: - logger.info("ignore non-final input") + ten_env.log_info("ignore non-final input") return except Exception as err: - logger.info( + ten_env.log_info( f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" ) - + stream_id = 0 try: stream_id = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_STREAM_ID) except Exception as err: - logger.info( + ten_env.log_info( f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_STREAM_ID} failed, err: {err}" ) @@ -263,11 +286,11 @@ def on_data(self, ten_env: TenEnv, data: Data) -> None: try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) if not input_text: - logger.info("ignore empty text") + ten_env.log_info("ignore empty text") return - logger.info(f"OnData input text: [{input_text}]") + ten_env.log_info(f"OnData input text: [{input_text}]") except Exception as err: - logger.info( + ten_env.log_info( f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" ) return @@ -275,17 +298,17 @@ def on_data(self, ten_env: TenEnv, data: Data) -> None: try: role = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_ROLE) if not role: - logger.warning("ignore empty role") + ten_env.log_warn("ignore empty role") return except Exception as err: - logger.info( + ten_env.log_info( f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_ROLE} failed, err: {err}" ) return ts = get_current_time() self.queue.put((ts, input_text, role, stream_id)) - + def on_audio_frame(self, ten_env: TenEnv, audio_frame: AudioFrame) -> None: pass diff --git a/agents/ten_packages/extension/tsdb_firestore/log.py b/agents/ten_packages/extension/tsdb_firestore/log.py deleted file mode 100644 index aa14bacd..00000000 --- a/agents/ten_packages/extension/tsdb_firestore/log.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by Wei Hu in 2024-08. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# -import logging - -logger = logging.getLogger("tsdb_firestore") -logger.setLevel(logging.INFO) - -formatter_str = ( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " - "[%(filename)s:%(lineno)d] - %(message)s" -) -formatter = logging.Formatter(formatter_str) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/tsdb_firestore/manifest.json b/agents/ten_packages/extension/tsdb_firestore/manifest.json index 8c4320f6..5b74ee41 100644 --- a/agents/ten_packages/extension/tsdb_firestore/manifest.json +++ b/agents/ten_packages/extension/tsdb_firestore/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { diff --git a/agents/ten_packages/extension/vision_analyze_tool_python/BUILD.gn b/agents/ten_packages/extension/vision_analyze_tool_python/BUILD.gn index dd6d6380..4d5fcbab 100644 --- a/agents/ten_packages/extension/vision_analyze_tool_python/BUILD.gn +++ b/agents/ten_packages/extension/vision_analyze_tool_python/BUILD.gn @@ -12,7 +12,6 @@ ten_package("vision_analyze_tool_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", "tests", diff --git a/agents/ten_packages/extension/vision_analyze_tool_python/__init__.py b/agents/ten_packages/extension/vision_analyze_tool_python/__init__.py index e41a4d08..72593ab2 100644 --- a/agents/ten_packages/extension/vision_analyze_tool_python/__init__.py +++ b/agents/ten_packages/extension/vision_analyze_tool_python/__init__.py @@ -4,6 +4,3 @@ # See the LICENSE file for more information. # from . import addon -from .log import logger - -logger.info("vision_analyze_tool_python extension loaded") diff --git a/agents/ten_packages/extension/vision_analyze_tool_python/addon.py b/agents/ten_packages/extension/vision_analyze_tool_python/addon.py index 61617358..1b499d5e 100644 --- a/agents/ten_packages/extension/vision_analyze_tool_python/addon.py +++ b/agents/ten_packages/extension/vision_analyze_tool_python/addon.py @@ -9,12 +9,11 @@ TenEnv, ) from .extension import VisionAnalyzeToolExtension -from .log import logger @register_addon_as_extension("vision_analyze_tool_python") class VisionAnalyzeToolExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: - logger.info("VisionAnalyzeToolExtensionAddon on_create_instance") + ten_env.log_info("TSDBFirestoreExtensionAddon on_create_instance") ten_env.on_create_instance_done(VisionAnalyzeToolExtension(name), context) diff --git a/agents/ten_packages/extension/vision_analyze_tool_python/extension.py b/agents/ten_packages/extension/vision_analyze_tool_python/extension.py index 73b6ad51..7b565706 100644 --- a/agents/ten_packages/extension/vision_analyze_tool_python/extension.py +++ b/agents/ten_packages/extension/vision_analyze_tool_python/extension.py @@ -7,10 +7,8 @@ from ten import ( AudioFrame, VideoFrame, - AsyncExtension, AsyncTenEnv, Cmd, - StatusCode, CmdResult, Data, ) @@ -19,8 +17,14 @@ from base64 import b64encode from ten_ai_base.const import CMD_CHAT_COMPLETION_CALL -from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension -from ten_ai_base.types import LLMChatCompletionUserMessageParam, LLMToolMetadata, LLMToolMetadataParameter, LLMToolResult +from ten_ai_base import AsyncLLMToolBaseExtension +from ten_ai_base.types import ( + LLMChatCompletionUserMessageParam, + LLMToolMetadata, + LLMToolMetadataParameter, + LLMToolResult, +) + def rgb2base64jpeg(rgb_data, width, height): # Convert the RGB image to a PIL Image @@ -79,6 +83,7 @@ def resize_image_keep_aspect(image, max_size=512): return resized_image + class VisionAnalyzeToolExtension(AsyncLLMToolBaseExtension): image_data = None image_width = 0 @@ -95,6 +100,7 @@ async def on_stop(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_stop") # TODO: clean up resources + async def on_deinit(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_deinit") @@ -108,17 +114,15 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: data_name = data.get_name() ten_env.log_debug("on_data name {}".format(data_name)) - # TODO: process data - pass - - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: audio_frame_name = audio_frame.get_name() ten_env.log_debug("on_audio_frame name {}".format(audio_frame_name)) - # TODO: process audio frame - pass - - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + async def on_video_frame( + self, ten_env: AsyncTenEnv, video_frame: VideoFrame + ) -> None: video_frame_name = video_frame.get_name() ten_env.log_debug("on_video_frame name {}".format(video_frame_name)) @@ -142,41 +146,34 @@ def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]: ), ] - async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult: + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: if name == "get_vision_chat_completion": if self.image_data is None: - raise Exception("No image data available") + raise ValueError("No image data available") if "query" not in args: - raise Exception("Failed to get property") + raise ValueError("Failed to get property") query = args["query"] - base64_image = rgb2base64jpeg(self.image_data, self.image_width, self.image_height) + base64_image = rgb2base64jpeg( + self.image_data, self.image_width, self.image_height + ) # return LLMToolResult(message=LLMCompletionArgsMessage(role="user", content=[result])) cmd: Cmd = Cmd.create(CMD_CHAT_COMPLETION_CALL) - message: LLMChatCompletionUserMessageParam = LLMChatCompletionUserMessageParam( - role="user", - content=[ - { - "type": "text", - "text": query - }, - { - "type": "image_url", - "image_url": { - "url": base64_image - } - } - ] + message: LLMChatCompletionUserMessageParam = ( + LLMChatCompletionUserMessageParam( + role="user", + content=[ + {"type": "text", "text": query}, + {"type": "image_url", "image_url": {"url": base64_image}}, + ], + ) ) - cmd.set_property_from_json("arguments", json.dumps({"messages":[message]})) + cmd.set_property_from_json("arguments", json.dumps({"messages": [message]})) ten_env.log_info("send_cmd {}".format(message)) cmd_result: CmdResult = await ten_env.send_cmd(cmd) result = cmd_result.get_property_to_json("response") - return { - "content": [{ - "type": "text", - "text": result - }] - } \ No newline at end of file + return {"content": [{"type": "text", "text": result}]} diff --git a/agents/ten_packages/extension/vision_analyze_tool_python/log.py b/agents/ten_packages/extension/vision_analyze_tool_python/log.py deleted file mode 100644 index 52de2298..00000000 --- a/agents/ten_packages/extension/vision_analyze_tool_python/log.py +++ /dev/null @@ -1,20 +0,0 @@ -# -# This file is part of TEN Framework, an open source project. -# Licensed under the Apache License, Version 2.0. -# See the LICENSE file for more information. -# -import logging - -logger = logging.getLogger("vision_analyze_tool_python") -logger.setLevel(logging.INFO) - -formatter_str = ( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " - "[%(filename)s:%(lineno)d] - %(message)s" -) -formatter = logging.Formatter(formatter_str) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/vision_analyze_tool_python/manifest.json b/agents/ten_packages/extension/vision_analyze_tool_python/manifest.json index 8e9ead2a..e087377a 100644 --- a/agents/ten_packages/extension/vision_analyze_tool_python/manifest.json +++ b/agents/ten_packages/extension/vision_analyze_tool_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "vision_analyze_tool_python", - "version": "0.3.1", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -42,21 +42,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/extension/vision_tool_python/BUILD.gn b/agents/ten_packages/extension/vision_tool_python/BUILD.gn index 284103e8..83e8c5d1 100644 --- a/agents/ten_packages/extension/vision_tool_python/BUILD.gn +++ b/agents/ten_packages/extension/vision_tool_python/BUILD.gn @@ -12,7 +12,6 @@ ten_package("vision_tool_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", "tests", diff --git a/agents/ten_packages/extension/vision_tool_python/__init__.py b/agents/ten_packages/extension/vision_tool_python/__init__.py index 56edc718..72593ab2 100644 --- a/agents/ten_packages/extension/vision_tool_python/__init__.py +++ b/agents/ten_packages/extension/vision_tool_python/__init__.py @@ -4,6 +4,3 @@ # See the LICENSE file for more information. # from . import addon -from .log import logger - -logger.info("vision_tool_python extension loaded") diff --git a/agents/ten_packages/extension/vision_tool_python/addon.py b/agents/ten_packages/extension/vision_tool_python/addon.py index f7cd01a5..f0aba7e1 100644 --- a/agents/ten_packages/extension/vision_tool_python/addon.py +++ b/agents/ten_packages/extension/vision_tool_python/addon.py @@ -9,12 +9,11 @@ TenEnv, ) from .extension import VisionToolExtension -from .log import logger @register_addon_as_extension("vision_tool_python") class VisionToolExtensionAddon(Addon): def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: - logger.info("VisionToolExtensionAddon on_create_instance") + ten_env.log_info("VisionToolExtensionAddon on_create_instance") ten_env.on_create_instance_done(VisionToolExtension(name), context) diff --git a/agents/ten_packages/extension/vision_tool_python/extension.py b/agents/ten_packages/extension/vision_tool_python/extension.py index a2d1077f..f3cfc957 100644 --- a/agents/ten_packages/extension/vision_tool_python/extension.py +++ b/agents/ten_packages/extension/vision_tool_python/extension.py @@ -3,10 +3,8 @@ # Licensed under the Apache License, Version 2.0. # See the LICENSE file for more information. # -import json -from ten.cmd_result import CmdResult -from ten_ai_base.const import CMD_CHAT_COMPLETION_CALL -from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension, LLMToolMetadata, LLMToolResult +from ten_ai_base import AsyncLLMToolBaseExtension +from ten_ai_base.types import LLMToolMetadata, LLMToolResult from ten import ( AudioFrame, VideoFrame, @@ -18,8 +16,6 @@ from io import BytesIO from base64 import b64encode -from ten_ai_base.types import LLMChatCompletionUserMessageParam, LLMToolMetadataParameter - def rgb2base64jpeg(rgb_data, width, height): # Convert the RGB image to a PIL Image @@ -109,17 +105,15 @@ async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: data_name = data.get_name() ten_env.log_debug("on_data name {}".format(data_name)) - # TODO: process data - pass - - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: audio_frame_name = audio_frame.get_name() ten_env.log_debug("on_audio_frame name {}".format(audio_frame_name)) - # TODO: process audio frame - pass - - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + async def on_video_frame( + self, ten_env: AsyncTenEnv, video_frame: VideoFrame + ) -> None: video_frame_name = video_frame.get_name() ten_env.log_debug("on_video_frame name {}".format(video_frame_name)) @@ -136,19 +130,17 @@ def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]: ), ] - async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult: + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: if name == "get_vision_tool": if self.image_data is None: - raise Exception("No image data available") + raise ValueError("No image data available") base64_image = rgb2base64jpeg( - self.image_data, self.image_width, self.image_height) + self.image_data, self.image_width, self.image_height + ) # return LLMToolResult(message=LLMCompletionArgsMessage(role="user", content=[result])) return { - "content": [{ - "type": "image_url", - "image_url": { - "url": base64_image - } - }] + "content": [{"type": "image_url", "image_url": {"url": base64_image}}] } diff --git a/agents/ten_packages/extension/vision_tool_python/log.py b/agents/ten_packages/extension/vision_tool_python/log.py deleted file mode 100644 index 6e4e495f..00000000 --- a/agents/ten_packages/extension/vision_tool_python/log.py +++ /dev/null @@ -1,20 +0,0 @@ -# -# This file is part of TEN Framework, an open source project. -# Licensed under the Apache License, Version 2.0. -# See the LICENSE file for more information. -# -import logging - -logger = logging.getLogger("vision_tool_python") -logger.setLevel(logging.INFO) - -formatter_str = ( - "%(asctime)s - %(name)s - %(levelname)s - %(process)d - " - "[%(filename)s:%(lineno)d] - %(message)s" -) -formatter = logging.Formatter(formatter_str) - -console_handler = logging.StreamHandler() -console_handler.setFormatter(formatter) - -logger.addHandler(console_handler) diff --git a/agents/ten_packages/extension/vision_tool_python/manifest.json b/agents/ten_packages/extension/vision_tool_python/manifest.json index 82c8a516..ae483903 100644 --- a/agents/ten_packages/extension/vision_tool_python/manifest.json +++ b/agents/ten_packages/extension/vision_tool_python/manifest.json @@ -1,12 +1,12 @@ { "type": "extension", "name": "vision_tool_python", - "version": "0.3.0", + "version": "0.1.0", "dependencies": [ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -42,21 +42,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/extension/weatherapi_tool_python/BUILD.gn b/agents/ten_packages/extension/weatherapi_tool_python/BUILD.gn index 15a31a94..2fa4be7b 100644 --- a/agents/ten_packages/extension/weatherapi_tool_python/BUILD.gn +++ b/agents/ten_packages/extension/weatherapi_tool_python/BUILD.gn @@ -14,7 +14,6 @@ ten_package("weatherapi_tool_python") { "__init__.py", "addon.py", "extension.py", - "log.py", "manifest.json", "property.json", ] diff --git a/agents/ten_packages/extension/weatherapi_tool_python/extension.py b/agents/ten_packages/extension/weatherapi_tool_python/extension.py index 8f36098a..d4cf5580 100644 --- a/agents/ten_packages/extension/weatherapi_tool_python/extension.py +++ b/agents/ten_packages/extension/weatherapi_tool_python/extension.py @@ -15,9 +15,8 @@ from ten import Cmd from ten.async_ten_env import AsyncTenEnv -from ten_ai_base.helper import get_properties_string -from ten_ai_base import BaseConfig -from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension +from ten_ai_base.config import BaseConfig +from ten_ai_base import AsyncLLMToolBaseExtension from ten_ai_base.types import LLMToolMetadata, LLMToolMetadataParameter, LLMToolResult CMD_TOOL_REGISTER = "tool_register" @@ -35,10 +34,10 @@ CURRENT_TOOL_PARAMETERS = { "type": "object", "properties": { - "location": { - "type": "string", - "description": "The city and state (use only English) e.g. San Francisco, CA" - } + "location": { + "type": "string", + "description": "The city and state (use only English) e.g. San Francisco, CA", + } }, "required": ["location"], } @@ -49,14 +48,14 @@ HISTORY_TOOL_PARAMETERS = { "type": "object", "properties": { - "location": { - "type": "string", - "description": "The city and state (use only English) e.g. San Francisco, CA" - }, + "location": { + "type": "string", + "description": "The city and state (use only English) e.g. San Francisco, CA", + }, "datetime": { - "type": "string", - "description": "The datetime user is referring in date format e.g. 2024-10-09" - } + "type": "string", + "description": "The datetime user is referring in date format e.g. 2024-10-09", + }, }, "required": ["location", "datetime"], } @@ -67,26 +66,28 @@ FORECAST_TOOL_PARAMETERS = { "type": "object", "properties": { - "location": { - "type": "string", - "description": "The city and state (use only English) e.g. San Francisco, CA" - } + "location": { + "type": "string", + "description": "The city and state (use only English) e.g. San Francisco, CA", + } }, "required": ["location"], } PROPERTY_API_KEY = "api_key" # Required + @dataclass class WeatherToolConfig(BaseConfig): api_key: str = "" + class WeatherToolExtension(AsyncLLMToolBaseExtension): def __init__(self, name: str) -> None: super().__init__(name) self.session = None self.ten_env = None - self.config : WeatherToolConfig = None + self.config: WeatherToolConfig = None async def on_init(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_init") @@ -95,7 +96,7 @@ async def on_init(self, ten_env: AsyncTenEnv) -> None: async def on_start(self, ten_env: AsyncTenEnv) -> None: ten_env.log_debug("on_start") - self.config = WeatherToolConfig.create(ten_env=ten_env) + self.config = await WeatherToolConfig.create_async(ten_env=ten_env) ten_env.log_info(f"config: {self.config}") if self.config.api_key: await super().on_start(ten_env) @@ -162,10 +163,12 @@ def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]: required=True, ), ], - ) + ), ] - async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult: + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: ten_env.log_info(f"run_tool name: {name}, args: {args}") if name == CURRENT_TOOL_NAME: result = await self._get_current_weather(args) @@ -182,7 +185,7 @@ async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMTool async def _get_current_weather(self, args: dict) -> Any: if "location" not in args: - raise Exception("Failed to get property") + raise ValueError("Failed to get property") try: location = args["location"] @@ -202,7 +205,7 @@ async def _get_current_weather(self, args: dict) -> Any: async def _get_past_weather(self, args: dict) -> Any: if "location" not in args or "datetime" not in args: - raise Exception("Failed to get property") + raise ValueError("Failed to get property") location = args["location"] datetime = args["datetime"] @@ -212,14 +215,18 @@ async def _get_past_weather(self, args: dict) -> Any: result = await response.json() # Remove all hourly data - if "forecast" in result and "forecastday" in result["forecast"] and result["forecast"]["forecastday"]: + if ( + "forecast" in result + and "forecastday" in result["forecast"] + and result["forecast"]["forecastday"] + ): result["forecast"]["forecastday"][0].pop("hour", None) return result async def _get_future_weather(self, args: dict) -> Any: if "location" not in args: - raise Exception("Failed to get property") + raise ValueError("Failed to get property") location = args["location"] url = f"http://api.weatherapi.com/v1/forecast.json?key={self.config.api_key}&q={location}&days=3&aqi=no&alerts=no" diff --git a/agents/ten_packages/extension/weatherapi_tool_python/manifest.json b/agents/ten_packages/extension/weatherapi_tool_python/manifest.json index 511b30ad..c7124370 100644 --- a/agents/ten_packages/extension/weatherapi_tool_python/manifest.json +++ b/agents/ten_packages/extension/weatherapi_tool_python/manifest.json @@ -6,7 +6,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "package": { @@ -29,21 +29,30 @@ { "name": "tool_register", "property": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "string" + "tool": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "type": "object", + "properties": {} + } + } + }, + "required": [ + "name", + "description", + "parameters" + ] } }, - "required": [ - "name", - "description", - "parameters" - ], "result": { "property": { "response": { diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/__init__.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/__init__.py index 167e6afe..f31a766d 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/__init__.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/__init__.py @@ -4,13 +4,19 @@ # See the LICENSE file for more information. # -from .types import LLMCallCompletionArgs, LLMDataCompletionArgs, LLMToolMetadata, LLMToolResult, LLMChatCompletionMessageParam +from .types import ( + LLMCallCompletionArgs, + LLMDataCompletionArgs, + LLMToolMetadata, + LLMToolResult, + LLMChatCompletionMessageParam, +) from .usage import LLMUsage, LLMCompletionTokensDetails, LLMPromptTokensDetails -from .llm import AsyncLLMBaseExtension -from .llm_tool import AsyncLLMToolBaseExtension from .chat_memory import ChatMemory, EVENT_MEMORY_APPENDED, EVENT_MEMORY_EXPIRED from .helper import AsyncQueue, AsyncEventEmitter from .config import BaseConfig +from .llm import AsyncLLMBaseExtension +from .llm_tool import AsyncLLMToolBaseExtension # Specify what should be imported when a user imports * from the # ten_ai_base package. @@ -26,4 +32,9 @@ "AsyncEventEmitter", "BaseConfig", "LLMChatCompletionMessageParam", + "LLMUsage", + "LLMCompletionTokensDetails", + "LLMPromptTokensDetails", + "EVENT_MEMORY_APPENDED", + "EVENT_MEMORY_EXPIRED", ] diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/config.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/config.py index 673a8431..99f2d09d 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/config.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/config.py @@ -2,7 +2,7 @@ import json from typing import TypeVar, Type, List -from ten import TenEnv +from ten import AsyncTenEnv, TenEnv from dataclasses import dataclass, fields @@ -22,6 +22,12 @@ def create(cls: Type[T], ten_env: TenEnv) -> T: c._init(ten_env) return c + @classmethod + async def create_async(cls: Type[T], ten_env: AsyncTenEnv) -> T: + c = cls() + await c._init_async(ten_env) + return c + def _init(obj, ten_env: TenEnv): """ Get property from ten_env to initialize the dataclass config. @@ -50,3 +56,29 @@ def _init(obj, ten_env: TenEnv): setattr(obj, field.name, json.loads(val)) except Exception as e: pass + + async def _init_async(obj, ten_env: AsyncTenEnv): + """ + Get property from ten_env to initialize the dataclass config. + """ + for field in fields(obj): + try: + match field.type: + case builtins.str: + val = await ten_env.get_property_string(field.name) + if val: + setattr(obj, field.name, val) + case builtins.int: + val = await ten_env.get_property_int(field.name) + setattr(obj, field.name, val) + case builtins.bool: + val = await ten_env.get_property_bool(field.name) + setattr(obj, field.name, val) + case builtins.float: + val = await ten_env.get_property_float(field.name) + setattr(obj, field.name, val) + case _: + val = await ten_env.get_property_to_json(field.name) + setattr(obj, field.name, json.loads(val)) + except Exception as e: + pass \ No newline at end of file diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm.py index e46b10b8..5ef942d3 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm.py @@ -5,6 +5,7 @@ # from abc import ABC, abstractmethod import asyncio +import traceback from ten import ( AsyncExtension, @@ -13,7 +14,14 @@ from ten.async_ten_env import AsyncTenEnv from ten.cmd import Cmd from ten.cmd_result import CmdResult, StatusCode -from .const import CMD_PROPERTY_TOOL, CMD_TOOL_REGISTER, DATA_OUT_NAME, DATA_OUT_PROPERTY_END_OF_SEGMENT, DATA_OUT_PROPERTY_TEXT, CMD_CHAT_COMPLETION_CALL +from .const import ( + CMD_PROPERTY_TOOL, + CMD_TOOL_REGISTER, + DATA_OUT_NAME, + DATA_OUT_PROPERTY_END_OF_SEGMENT, + DATA_OUT_PROPERTY_TEXT, + CMD_CHAT_COMPLETION_CALL, +) from .types import LLMCallCompletionArgs, LLMDataCompletionArgs, LLMToolMetadata from .helper import AsyncQueue import json @@ -28,6 +36,7 @@ class AsyncLLMBaseExtension(AsyncExtension, ABC): Use flush_input_items to flush the queue and cancel the current task. Override on_call_chat_completion and on_data_chat_completion to implement the chat completion logic. """ + # Create the queue for message processing def __init__(self, name: str): @@ -38,23 +47,24 @@ def __init__(self, name: str): self.current_task = None self.hit_default_cmd = False self.loop_task = None + self.loop = None - async def on_init(self, ten_env: AsyncTenEnv) -> None: - await super().on_init(ten_env) + async def on_init(self, async_ten_env: AsyncTenEnv) -> None: + await super().on_init(async_ten_env) - async def on_start(self, ten_env: AsyncTenEnv) -> None: - await super().on_start(ten_env) + async def on_start(self, async_ten_env: AsyncTenEnv) -> None: + await super().on_start(async_ten_env) if self.loop_task is None: self.loop = asyncio.get_event_loop() - self.loop_task = self.loop.create_task(self._process_queue(ten_env)) + self.loop_task = self.loop.create_task(self._process_queue(async_ten_env)) - async def on_stop(self, ten_env: AsyncTenEnv) -> None: - await super().on_stop(ten_env) + async def on_stop(self, async_ten_env: AsyncTenEnv) -> None: + await super().on_stop(async_ten_env) await self.queue.put(None) - async def on_deinit(self, ten_env: AsyncTenEnv) -> None: - await super().on_deinit(ten_env) + async def on_deinit(self, async_ten_env: AsyncTenEnv) -> None: + await super().on_deinit(async_ten_env) async def on_cmd(self, async_ten_env: AsyncTenEnv, cmd: Cmd) -> None: """ @@ -65,94 +75,96 @@ async def on_cmd(self, async_ten_env: AsyncTenEnv, cmd: Cmd) -> None: async_ten_env.log_debug(f"on_cmd name {cmd_name}") if cmd_name == CMD_TOOL_REGISTER: try: - tool_metadata_json = json.loads( - cmd.get_property_to_json(CMD_PROPERTY_TOOL)) + tool_metadata_json = cmd.get_property_to_json(CMD_PROPERTY_TOOL) async_ten_env.log_info(f"register tool: {tool_metadata_json}") - tool_metadata = LLMToolMetadata.model_validate_json( - tool_metadata_json) + tool_metadata = LLMToolMetadata.model_validate_json(tool_metadata_json) async with self.available_tools_lock: self.available_tools.append(tool_metadata) await self.on_tools_update(async_ten_env, tool_metadata) - async_ten_env.return_result( - CmdResult.create(StatusCode.OK), cmd) + await async_ten_env.return_result(CmdResult.create(StatusCode.OK), cmd) except Exception as err: - async_ten_env.log_warn(f"on_cmd failed: {err}") - async_ten_env.return_result( - CmdResult.create(StatusCode.ERROR), cmd) + async_ten_env.log_warn(f"on_cmd failed: {traceback.format_exc()}") + await async_ten_env.return_result( + CmdResult.create(StatusCode.ERROR), cmd + ) elif cmd_name == CMD_CHAT_COMPLETION_CALL: try: args = json.loads(cmd.get_property_to_json("arguments")) response = await self.on_call_chat_completion(async_ten_env, **args) cmd_result = CmdResult.create(StatusCode.OK) - cmd_result.set_property_from_json( - "response", response) - async_ten_env.return_result(cmd_result, cmd) + cmd_result.set_property_from_json("response", response) + await async_ten_env.return_result(cmd_result, cmd) except Exception as err: async_ten_env.log_warn(f"on_cmd failed: {err}") - async_ten_env.return_result( - CmdResult.create(StatusCode.ERROR), cmd) + await async_ten_env.return_result( + CmdResult.create(StatusCode.ERROR), cmd + ) - - async def queue_input_item(self, prepend: bool = False, **kargs: LLMDataCompletionArgs): + async def queue_input_item( + self, prepend: bool = False, **kargs: LLMDataCompletionArgs + ): """Queues an input item for processing.""" await self.queue.put(kargs, prepend) - async def flush_input_items(self, ten_env: AsyncTenEnv): + async def flush_input_items(self, async_ten_env: AsyncTenEnv): """Flushes the self.queue and cancels the current task.""" # Flush the queue using the new flush method await self.queue.flush() # Cancel the current task if one is running if self.current_task: - ten_env.log_info("Cancelling the current task during flush.") + async_ten_env.log_info("Cancelling the current task during flush.") self.current_task.cancel() - def send_text_output(self, ten_env: AsyncTenEnv, sentence: str, end_of_segment: bool): + def send_text_output( + self, async_ten_env: AsyncTenEnv, sentence: str, end_of_segment: bool + ): try: output_data = Data.create(DATA_OUT_NAME) - output_data.set_property_string( - DATA_OUT_PROPERTY_TEXT, sentence) + output_data.set_property_string(DATA_OUT_PROPERTY_TEXT, sentence) output_data.set_property_bool( DATA_OUT_PROPERTY_END_OF_SEGMENT, end_of_segment ) - ten_env.send_data(output_data) - ten_env.log_info( + asyncio.create_task(async_ten_env.send_data(output_data)) + async_ten_env.log_info( f"{'end of segment ' if end_of_segment else ''}sent sentence [{sentence}]" ) except Exception as err: - ten_env.log_warn( - f"send sentence [{sentence}] failed, err: {err}" - ) + async_ten_env.log_warn(f"send sentence [{sentence}] failed, err: {err}") @abstractmethod - async def on_call_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs) -> any: + async def on_call_chat_completion( + self, async_ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs + ) -> any: """Called when a chat completion is requested by cmd call. Implement this method to process the chat completion.""" - pass @abstractmethod - async def on_data_chat_completion(self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs) -> None: + async def on_data_chat_completion( + self, async_ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs + ) -> None: """ Called when a chat completion is requested by data input. Implement this method to process the chat completion. Note that this method is stream-based, and it should consider supporting local context caching. """ - pass @abstractmethod - async def on_tools_update(self, ten_env: AsyncTenEnv, tool: LLMToolMetadata) -> None: + async def on_tools_update( + self, async_ten_env: AsyncTenEnv, tool: LLMToolMetadata + ) -> None: """Called when a new tool is registered. Implement this method to process the new tool.""" - pass - async def _process_queue(self, ten_env: AsyncTenEnv): + async def _process_queue(self, async_ten_env: AsyncTenEnv): """Asynchronously process queue items one by one.""" while True: # Wait for an item to be available in the queue args = await self.queue.get() try: - ten_env.log_info(f"Processing queue item: {args}") + async_ten_env.log_info(f"Processing queue item: {args}") self.current_task = asyncio.create_task( - self.on_data_chat_completion(ten_env, **args)) + self.on_data_chat_completion(async_ten_env, **args) + ) await self.current_task # Wait for the current task to finish or be cancelled except asyncio.CancelledError: - ten_env.log_info(f"Task cancelled: {args}") + async_ten_env.log_info(f"Task cancelled: {args}") except Exception as err: - ten_env.log_error(f"Task failed: {args}, err: {err}") + async_ten_env.log_error(f"Task failed: {args}, err: {err}") diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm_tool.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm_tool.py index 4c6f6b07..c1232660 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm_tool.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm_tool.py @@ -1,5 +1,4 @@ from abc import ABC, abstractmethod -from asyncio import sleep import asyncio import traceback from ten import ( @@ -13,77 +12,86 @@ from ten.cmd_result import CmdResult, StatusCode from ten.video_frame import VideoFrame from .types import LLMToolMetadata, LLMToolResult -from .const import CMD_TOOL_REGISTER, CMD_TOOL_CALL, CMD_PROPERTY_TOOL, CMD_PROPERTY_RESULT +from .const import ( + CMD_TOOL_REGISTER, + CMD_TOOL_CALL, + CMD_PROPERTY_TOOL, + CMD_PROPERTY_RESULT, +) import json class AsyncLLMToolBaseExtension(AsyncExtension, ABC): - async def on_start(self, ten_env: AsyncTenEnv) -> None: - await super().on_start(ten_env) + async def on_start(self, async_ten_env: AsyncTenEnv) -> None: + await super().on_start(async_ten_env) - tools:list[LLMToolMetadata] = self.get_tool_metadata(ten_env) + tools: list[LLMToolMetadata] = self.get_tool_metadata(async_ten_env) for tool in tools: - ten_env.log_info(f"tool: {tool}") + async_ten_env.log_info(f"tool: {tool}") c: Cmd = Cmd.create(CMD_TOOL_REGISTER) - c.set_property_from_json( - CMD_PROPERTY_TOOL, json.dumps(tool.model_dump_json())) - ten_env.log_info(f"begin tool register, {tool}") - await ten_env.send_cmd(c) - ten_env.log_info(f"tool registered, {tool}") + c.set_property_from_json(CMD_PROPERTY_TOOL, json.dumps(tool.model_dump())) + async_ten_env.log_info(f"begin tool register, {tool}") + await async_ten_env.send_cmd(c) + async_ten_env.log_info(f"tool registered, {tool}") - async def on_stop(self, ten_env: AsyncTenEnv) -> None: - await super().on_stop(ten_env) + async def on_stop(self, async_ten_env: AsyncTenEnv) -> None: + await super().on_stop(async_ten_env) - async def on_cmd(self, ten_env: AsyncTenEnv, cmd: Cmd) -> None: + async def on_cmd(self, async_ten_env: AsyncTenEnv, cmd: Cmd) -> None: cmd_name = cmd.get_name() - ten_env.log_debug("on_cmd name {}".format(cmd_name)) + async_ten_env.log_debug("on_cmd name {}".format(cmd_name)) if cmd_name == CMD_TOOL_CALL: try: tool_name = cmd.get_property_string("name") tool_args = json.loads(cmd.get_property_to_json("arguments")) - ten_env.log_debug( - f"tool_name: {tool_name}, tool_args: {tool_args}") - result = await asyncio.create_task(self.run_tool(ten_env, tool_name, tool_args)) + async_ten_env.log_debug( + f"tool_name: {tool_name}, tool_args: {tool_args}" + ) + result = await asyncio.create_task( + self.run_tool(async_ten_env, tool_name, tool_args) + ) if result is None: - ten_env.return_result(CmdResult.create(StatusCode.OK), cmd) + await async_ten_env.return_result( + CmdResult.create(StatusCode.OK), cmd + ) return cmd_result: CmdResult = CmdResult.create(StatusCode.OK) cmd_result.set_property_from_json( - CMD_PROPERTY_RESULT, json.dumps(result)) - ten_env.return_result(cmd_result, cmd) - ten_env.log_info(f"tool result done, {result}") - except Exception as err: - ten_env.log_warn(f"on_cmd failed: {traceback.format_exc()}") - ten_env.return_result(CmdResult.create(StatusCode.ERROR), cmd) - - async def on_data(self, ten_env: AsyncTenEnv, data: Data) -> None: + CMD_PROPERTY_RESULT, json.dumps(result) + ) + await async_ten_env.return_result(cmd_result, cmd) + async_ten_env.log_info(f"tool result done, {result}") + except Exception: + async_ten_env.log_warn(f"on_cmd failed: {traceback.format_exc()}") + await async_ten_env.return_result( + CmdResult.create(StatusCode.ERROR), cmd + ) + + async def on_data(self, async_ten_env: AsyncTenEnv, data: Data) -> None: data_name = data.get_name() - ten_env.log_debug("on_data name {}".format(data_name)) - - # TODO: process data - pass + async_ten_env.log_debug(f"on_data name {data_name}") - async def on_audio_frame(self, ten_env: AsyncTenEnv, audio_frame: AudioFrame) -> None: + async def on_audio_frame( + self, async_ten_env: AsyncTenEnv, audio_frame: AudioFrame + ) -> None: audio_frame_name = audio_frame.get_name() - ten_env.log_debug("on_audio_frame name {}".format(audio_frame_name)) + async_ten_env.log_debug("on_audio_frame name {}".format(audio_frame_name)) - # TODO: process audio frame - pass - - async def on_video_frame(self, ten_env: AsyncTenEnv, video_frame: VideoFrame) -> None: + async def on_video_frame( + self, async_ten_env: AsyncTenEnv, video_frame: VideoFrame + ) -> None: video_frame_name = video_frame.get_name() - ten_env.log_debug("on_video_frame name {}".format(video_frame_name)) - - # TODO: process video frame - pass + async_ten_env.log_debug("on_video_frame name {}".format(video_frame_name)) @abstractmethod def get_tool_metadata(self, ten_env: TenEnv) -> list[LLMToolMetadata]: pass @abstractmethod - async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult: + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: pass diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/tts.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/tts.py index 6eaa7c3d..bc8a9ac1 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/tts.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/tts.py @@ -15,7 +15,12 @@ from ten.audio_frame import AudioFrame, AudioFrameDataFmt from ten.cmd import Cmd from ten.cmd_result import CmdResult, StatusCode -from ten_ai_base.const import CMD_IN_FLUSH, CMD_OUT_FLUSH, DATA_IN_PROPERTY_END_OF_SEGMENT, DATA_IN_PROPERTY_TEXT +from ten_ai_base.const import ( + CMD_IN_FLUSH, + CMD_OUT_FLUSH, + DATA_IN_PROPERTY_END_OF_SEGMENT, + DATA_IN_PROPERTY_TEXT, +) from ten_ai_base.types import TTSPcmOptions from .helper import AsyncQueue, PCMWriter, get_property_bool, get_property_string @@ -28,6 +33,7 @@ class AsyncTTSBaseExtension(AsyncExtension, ABC): Use begin_send_audio_out, send_audio_out, end_send_audio_out to send the audio data to the output. Override on_request_tts to implement the TTS logic. """ + # Create the queue for message processing def __init__(self, name: str): @@ -35,7 +41,7 @@ def __init__(self, name: str): self.queue = AsyncQueue() self.current_task = None self.loop_task = None - self.leftover_bytes = b'' + self.leftover_bytes = b"" async def on_init(self, ten_env: AsyncTenEnv) -> None: await super().on_init(ten_env) @@ -66,7 +72,7 @@ async def on_cmd(self, async_ten_env: AsyncTenEnv, cmd: Cmd) -> None: status_code, detail = StatusCode.OK, "success" cmd_result = CmdResult.create(status_code) cmd_result.set_property_string("detail", detail) - async_ten_env.return_result(cmd_result, cmd) + await async_ten_env.return_result(cmd_result, cmd) async def on_data(self, async_ten_env: AsyncTenEnv, data: Data) -> None: # Get the necessary properties @@ -91,7 +97,9 @@ async def flush_input_items(self, ten_env: AsyncTenEnv): ten_env.log_info("Cancelling the current task during flush.") self.current_task.cancel() - def send_audio_out(self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcmOptions) -> None: + async def send_audio_out( + self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcmOptions + ) -> None: """End sending audio out.""" sample_rate = args.get("sample_rate", 16000) bytes_per_sample = args.get("bytes_per_sample", 2) @@ -103,11 +111,13 @@ def send_audio_out(self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcm # Check if combined_data length is odd if len(combined_data) % (bytes_per_sample * number_of_channels) != 0: # Save the last incomplete frame - valid_length = len(combined_data) - (len(combined_data) % (bytes_per_sample * number_of_channels)) + valid_length = len(combined_data) - ( + len(combined_data) % (bytes_per_sample * number_of_channels) + ) self.leftover_bytes = combined_data[valid_length:] combined_data = combined_data[:valid_length] else: - self.leftover_bytes = b'' + self.leftover_bytes = b"" if combined_data: f = AudioFrame.create("pcm_frame") @@ -115,17 +125,21 @@ def send_audio_out(self, ten_env: AsyncTenEnv, audio_data: bytes, **args: TTSPcm f.set_bytes_per_sample(bytes_per_sample) f.set_number_of_channels(number_of_channels) f.set_data_fmt(AudioFrameDataFmt.INTERLEAVE) - f.set_samples_per_channel(len(combined_data) // (bytes_per_sample * number_of_channels)) + f.set_samples_per_channel( + len(combined_data) // (bytes_per_sample * number_of_channels) + ) f.alloc_buf(len(combined_data)) buff = f.lock_buf() buff[:] = combined_data f.unlock_buf(buff) - ten_env.send_audio_frame(f) + await ten_env.send_audio_frame(f) except Exception as e: ten_env.log_error(f"error send audio frame, {traceback.format_exc()}") @abstractmethod - async def on_request_tts(self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool) -> None: + async def on_request_tts( + self, ten_env: AsyncTenEnv, input_text: str, end_of_segment: bool + ) -> None: """ Called when a new input item is available in the queue. Override this method to implement the TTS request logic. Use send_audio_out to send the audio data to the output when the audio data is ready. @@ -137,7 +151,6 @@ async def on_cancel_tts(self, ten_env: AsyncTenEnv) -> None: """Called when the TTS request is cancelled.""" pass - async def _process_queue(self, ten_env: AsyncTenEnv): """Asynchronously process queue items one by one.""" while True: @@ -146,7 +159,8 @@ async def _process_queue(self, ten_env: AsyncTenEnv): try: self.current_task = asyncio.create_task( - self.on_request_tts(ten_env, text, end_of_segment)) + self.on_request_tts(ten_env, text, end_of_segment) + ) await self.current_task # Wait for the current task to finish or be cancelled except asyncio.CancelledError: ten_env.log_info(f"Task cancelled: {text}") diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/types.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/types.py index ae2dae77..84bf8cc8 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/types.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/types.py @@ -2,12 +2,14 @@ from pydantic import BaseModel from typing_extensions import Literal, Required, TypedDict + class LLMToolMetadataParameter(BaseModel): name: str type: str description: str required: Optional[bool] = False + class LLMToolMetadata(BaseModel): name: str description: str @@ -32,6 +34,7 @@ class LLMChatCompletionContentPartImageParam(TypedDict, total=False): type: Required[Literal["image_url"]] """The type of the content part.""" + class InputAudio(TypedDict, total=False): data: Required[str] """Base64 encoded audio data.""" @@ -46,6 +49,7 @@ class LLMChatCompletionContentPartInputAudioParam(TypedDict, total=False): type: Required[Literal["input_audio"]] """The type of the content part. Always `input_audio`.""" + class LLMChatCompletionContentPartTextParam(TypedDict, total=False): text: Required[str] """The text content.""" @@ -53,10 +57,14 @@ class LLMChatCompletionContentPartTextParam(TypedDict, total=False): type: Required[Literal["text"]] """The type of the content part.""" + LLMChatCompletionContentPartParam: TypeAlias = Union[ - LLMChatCompletionContentPartTextParam, LLMChatCompletionContentPartImageParam, LLMChatCompletionContentPartInputAudioParam + LLMChatCompletionContentPartTextParam, + LLMChatCompletionContentPartImageParam, + LLMChatCompletionContentPartInputAudioParam, ] + class LLMChatCompletionToolMessageParam(TypedDict, total=False): content: Required[Union[str, Iterable[LLMChatCompletionContentPartTextParam]]] """The contents of the tool message.""" @@ -67,6 +75,7 @@ class LLMChatCompletionToolMessageParam(TypedDict, total=False): tool_call_id: Required[str] """Tool call that this message is responding to.""" + class LLMChatCompletionUserMessageParam(TypedDict, total=False): content: Required[Union[str, Iterable[LLMChatCompletionContentPartParam]]] """The contents of the user message.""" @@ -81,19 +90,23 @@ class LLMChatCompletionUserMessageParam(TypedDict, total=False): role. """ + LLMChatCompletionMessageParam: TypeAlias = Union[ LLMChatCompletionUserMessageParam, LLMChatCompletionToolMessageParam ] + class LLMToolResult(TypedDict, total=False): content: Required[Union[str, Iterable[LLMChatCompletionContentPartParam]]] + class LLMCallCompletionArgs(TypedDict, total=False): messages: Iterable[LLMChatCompletionMessageParam] + class LLMDataCompletionArgs(TypedDict, total=False): messages: Iterable[LLMChatCompletionMessageParam] - no_tool: bool = False + no_tool: bool class TTSPcmOptions(TypedDict, total=False): @@ -104,4 +117,4 @@ class TTSPcmOptions(TypedDict, total=False): """The number of audio channels.""" bytes_per_sample: int - """The number of bytes per sample.""" \ No newline at end of file + """The number of bytes per sample.""" diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/usage.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/usage.py index 2e555b1c..fdfe0872 100644 --- a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/usage.py +++ b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/usage.py @@ -1,14 +1,18 @@ from pydantic import BaseModel + class LLMCompletionTokensDetails(BaseModel): accepted_prediction_tokens: int = 0 audio_tokens: int = 0 reasoning_tokens: int = 0 rejected_prediction_tokens: int = 0 + class LLMPromptTokensDetails(BaseModel): audio_tokens: int = 0 cached_tokens: int = 0 + text_tokens: int = 0 + class LLMUsage(BaseModel): completion_tokens: int = 0 @@ -16,4 +20,4 @@ class LLMUsage(BaseModel): total_tokens: int = 0 completion_tokens_details: LLMCompletionTokensDetails | None = None - prompt_tokens_details: LLMPromptTokensDetails | None = None \ No newline at end of file + prompt_tokens_details: LLMPromptTokensDetails | None = None diff --git a/agents/ten_packages/system/ten_ai_base/manifest.json b/agents/ten_packages/system/ten_ai_base/manifest.json index 6e7b5561..9da9cd71 100644 --- a/agents/ten_packages/system/ten_ai_base/manifest.json +++ b/agents/ten_packages/system/ten_ai_base/manifest.json @@ -13,7 +13,7 @@ { "type": "system", "name": "ten_runtime_python", - "version": "0.4" + "version": "0.6" } ], "api": {} diff --git a/playground/src/middleware.tsx b/playground/src/middleware.tsx index 48d74439..750cab35 100644 --- a/playground/src/middleware.tsx +++ b/playground/src/middleware.tsx @@ -55,7 +55,7 @@ export async function middleware(req: NextRequest) { return NextResponse.rewrite(url); } - url.href = `${TEN_DEV_SERVER_URL}${pathname.replace('/api/dev/', '/api/dev-server/')}`; + url.href = `${TEN_DEV_SERVER_URL}${pathname.replace('/api/dev/', '/api/designer/')}`; // console.log(`Rewriting request to ${url.href}`); return NextResponse.rewrite(url); diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000..f8458b4f --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,36 @@ +{ + "typeCheckingMode": "basic", + "reportUnusedCoroutine": "error", + "reportMissingAwait": "error", + "reportUnawaitedAsyncFunctions": "error", + "reportArgumentType": false, + "reportMissingParameterType": false, + "reportAssignmentType": false, + "reportAttributeAccessIssue": false, + "reportOptionalMemberAccess": false, + "reportOptionalContextManager": false, + "reportCallIssue": false, + "reportGeneralTypeIssues": "warning", + "reportReturnType": false, + "reportUnboundVariable": false, + "reportOptionalSubscript": false, + "reportOptionalIterable": false, + "reportOperatorIssue": false, + "exclude": [ + "venv", + "__pycache__", + "./agents/ten_packages/system/ten_runtime_python/**/*", + ], + "include": [ + "agents" + ], + "executionEnvironments": [ + { + "root": "./agents", + "extraPaths": [ + "./ten_packages/system/ten_runtime_python/interface", + "./ten_packages/system/ten_ai_base/interface" + ] + } + ] +} \ No newline at end of file