From af777ffcecc7c1e1c54c9130508fdd5671050f26 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 08:07:46 +0000 Subject: [PATCH 01/72] feat: remove golang --- .../extension/chat_transcriber/extension.go | 147 ------ .../addon/extension/chat_transcriber/go.mod | 10 - .../addon/extension/chat_transcriber/go.sum | 4 - .../extension/chat_transcriber/manifest.json | 45 -- .../chat_transcriber/pb/chat_text.pb.go | 475 ------------------ .../chat_transcriber/pb/chat_text.proto | 37 -- .../extension/chat_transcriber/property.json | 1 - .../elevenlabs_tts/elevenlabs_tts.go | 82 --- .../elevenlabs_tts_extension.go | 340 ------------- agents/addon/extension/elevenlabs_tts/go.mod | 10 - agents/addon/extension/elevenlabs_tts/go.sum | 2 - .../extension/elevenlabs_tts/manifest.json | 74 --- agents/addon/extension/elevenlabs_tts/pcm.go | 104 ---- .../extension/elevenlabs_tts/property.json | 1 - .../extension/interrupt_detector/extension.go | 78 --- .../addon/extension/interrupt_detector/go.mod | 7 - .../interrupt_detector/manifest.json | 38 -- .../interrupt_detector/property.json | 1 - .../addon/extension/openai_chatgpt/README.md | 0 agents/addon/extension/openai_chatgpt/go.mod | 17 - agents/addon/extension/openai_chatgpt/go.sum | 12 - .../extension/openai_chatgpt/manifest.json | 83 --- .../openai_chatgpt/openai_chatgpt.go | 111 ---- .../openai_chatgpt_extension.go | 391 -------------- .../extension/openai_chatgpt/property.json | 1 - .../extension/openai_chatgpt/sentence.go | 30 -- .../extension/openai_chatgpt/sentence_test.go | 150 ------ 27 files changed, 2251 deletions(-) delete mode 100644 agents/addon/extension/chat_transcriber/extension.go delete mode 100644 agents/addon/extension/chat_transcriber/go.mod delete mode 100644 agents/addon/extension/chat_transcriber/go.sum delete mode 100644 agents/addon/extension/chat_transcriber/manifest.json delete mode 100644 agents/addon/extension/chat_transcriber/pb/chat_text.pb.go delete mode 100644 agents/addon/extension/chat_transcriber/pb/chat_text.proto delete mode 100644 agents/addon/extension/chat_transcriber/property.json delete mode 100644 agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go delete mode 100644 agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go delete mode 100644 agents/addon/extension/elevenlabs_tts/go.mod delete mode 100644 agents/addon/extension/elevenlabs_tts/go.sum delete mode 100644 agents/addon/extension/elevenlabs_tts/manifest.json delete mode 100644 agents/addon/extension/elevenlabs_tts/pcm.go delete mode 100644 agents/addon/extension/elevenlabs_tts/property.json delete mode 100644 agents/addon/extension/interrupt_detector/extension.go delete mode 100644 agents/addon/extension/interrupt_detector/go.mod delete mode 100644 agents/addon/extension/interrupt_detector/manifest.json delete mode 100644 agents/addon/extension/interrupt_detector/property.json delete mode 100644 agents/addon/extension/openai_chatgpt/README.md delete mode 100644 agents/addon/extension/openai_chatgpt/go.mod delete mode 100644 agents/addon/extension/openai_chatgpt/go.sum delete mode 100644 agents/addon/extension/openai_chatgpt/manifest.json delete mode 100644 agents/addon/extension/openai_chatgpt/openai_chatgpt.go delete mode 100644 agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go delete mode 100644 agents/addon/extension/openai_chatgpt/property.json delete mode 100644 agents/addon/extension/openai_chatgpt/sentence.go delete mode 100644 agents/addon/extension/openai_chatgpt/sentence_test.go diff --git a/agents/addon/extension/chat_transcriber/extension.go b/agents/addon/extension/chat_transcriber/extension.go deleted file mode 100644 index 33fba48ab..000000000 --- a/agents/addon/extension/chat_transcriber/extension.go +++ /dev/null @@ -1,147 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by Wei Hu in 2022-10. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -package extension - -import ( - "chat_transcriber/pb" - "fmt" - "log/slog" - "time" - - "agora.io/rte/rtego" - "google.golang.org/protobuf/proto" -) - -const ( - textDataTextField = "text" - textDataFinalField = "is_final" - textDataStreamIdField = "stream_id" - textDataEndOfSegmentField = "end_of_segment" -) - -var ( - logTag = slog.String("extension", "CHAT_TRANSCRIBER_EXTENSION") -) - -type chatTranscriberExtension struct { - rtego.DefaultExtension - - cachedTextMap map[uint32]string // record the cached text data for each stream id -} - -func newExtension(name string) rtego.Extension { - return &chatTranscriberExtension{ - cachedTextMap: make(map[uint32]string), - } -} - -// OnData receives data from rte graph. -// current supported data: -// - name: text_data -// example: -// {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} -func (p *chatTranscriberExtension) OnData( - rte rtego.Rte, - data rtego.Data, -) { - // Get the text data from data. - text, err := data.GetPropertyString(textDataTextField) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataTextField, err), logTag) - return - } - - // Get the 'is_final' flag from data which indicates whether the text is final, - // otherwise it could be overwritten by the next text. - final, err := data.GetPropertyBool(textDataFinalField) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataFinalField, err), logTag) - return - } - - // Get the stream id from data. - streamId, err := data.GetPropertyUint32(textDataStreamIdField) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataStreamIdField, err), logTag) - return - } - - // Get the 'end_of_segment' flag from data which indicates whether a line break is needed. - endOfSegment, err := data.GetPropertyBool(textDataEndOfSegmentField) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataEndOfSegmentField, err), logTag) - return - } - - slog.Debug(fmt.Sprintf( - "OnData %s: %s %s: %t %s: %d %s: %t", - textDataTextField, - text, - textDataFinalField, - final, - textDataStreamIdField, - streamId, - textDataEndOfSegmentField, - endOfSegment), logTag) - - // We cache all final text data and append the non-final text data to the cached data - // until the end of the segment. - if endOfSegment { - if cachedText, ok := p.cachedTextMap[streamId]; ok { - text = cachedText + text - delete(p.cachedTextMap, streamId) - } - } else { - if final { - if cachedText, ok := p.cachedTextMap[streamId]; ok { - text = cachedText + text - p.cachedTextMap[streamId] = text - } else { - p.cachedTextMap[streamId] = text - } - } - } - - pb := pb.Text{ - Uid: int32(streamId), - DataType: "transcribe", - Texttime: time.Now().UnixMilli(), - Words: []*pb.Word{ - { - Text: text, - IsFinal: endOfSegment, - }, - }, - } - - pbData, err := proto.Marshal(&pb) - if err != nil { - slog.Warn(fmt.Sprintf("OnData Marshal error: %v", err), logTag) - return - } - - // convert the origin text data to the protobuf data and send it to the graph. - rteData, err := rtego.NewData("data") - rteData.SetPropertyBytes("data", pbData) - if err != nil { - slog.Warn(fmt.Sprintf("OnData NewData error: %v", err), logTag) - return - } - - rte.SendData(rteData) -} - -func init() { - slog.Info("chat_transcriber extension init", logTag) - - // Register addon - rtego.RegisterAddonAsExtension( - "chat_transcriber", - rtego.NewDefaultExtensionAddon(newExtension), - ) -} diff --git a/agents/addon/extension/chat_transcriber/go.mod b/agents/addon/extension/chat_transcriber/go.mod deleted file mode 100644 index 311ff8aae..000000000 --- a/agents/addon/extension/chat_transcriber/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module chat_transcriber - -go 1.18 - -replace agora.io/rte => ../../../interface - -require ( - agora.io/rte v0.0.0-00010101000000-000000000000 - google.golang.org/protobuf v1.34.2 -) diff --git a/agents/addon/extension/chat_transcriber/go.sum b/agents/addon/extension/chat_transcriber/go.sum deleted file mode 100644 index 73d32b16b..000000000 --- a/agents/addon/extension/chat_transcriber/go.sum +++ /dev/null @@ -1,4 +0,0 @@ -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= diff --git a/agents/addon/extension/chat_transcriber/manifest.json b/agents/addon/extension/chat_transcriber/manifest.json deleted file mode 100644 index 269a5a534..000000000 --- a/agents/addon/extension/chat_transcriber/manifest.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "type": "extension", - "name": "chat_transcriber", - "version": "0.1.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.1.0" - } - ], - "api": { - "property": {}, - "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "is_final": { - "type": "bool" - }, - "stream_id": { - "type": "uint32" - }, - "end_of_segment": { - "type": "bool" - } - } - } - ], - "data_out": [ - { - "name": "data" - } - ] - } -} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber/pb/chat_text.pb.go b/agents/addon/extension/chat_transcriber/pb/chat_text.pb.go deleted file mode 100644 index 034473fd0..000000000 --- a/agents/addon/extension/chat_transcriber/pb/chat_text.pb.go +++ /dev/null @@ -1,475 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.33.0 -// protoc (unknown) -// source: chat_text.proto - -package pb - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -type Text struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Vendor int32 `protobuf:"varint,1,opt,name=vendor,proto3" json:"vendor,omitempty"` - Version int32 `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"` - Seqnum int32 `protobuf:"varint,3,opt,name=seqnum,proto3" json:"seqnum,omitempty"` - Uid int32 `protobuf:"varint,4,opt,name=uid,proto3" json:"uid,omitempty"` - Flag int32 `protobuf:"varint,5,opt,name=flag,proto3" json:"flag,omitempty"` - Time int64 `protobuf:"varint,6,opt,name=time,proto3" json:"time,omitempty"` // final time =first nofinal time - Lang int32 `protobuf:"varint,7,opt,name=lang,proto3" json:"lang,omitempty"` - Starttime int32 `protobuf:"varint,8,opt,name=starttime,proto3" json:"starttime,omitempty"` - Offtime int32 `protobuf:"varint,9,opt,name=offtime,proto3" json:"offtime,omitempty"` - Words []*Word `protobuf:"bytes,10,rep,name=words,proto3" json:"words,omitempty"` - EndOfSegment bool `protobuf:"varint,11,opt,name=end_of_segment,json=endOfSegment,proto3" json:"end_of_segment,omitempty"` - DurationMs int32 `protobuf:"varint,12,opt,name=duration_ms,json=durationMs,proto3" json:"duration_ms,omitempty"` - DataType string `protobuf:"bytes,13,opt,name=data_type,json=dataType,proto3" json:"data_type,omitempty"` // transcribe ,translate - Trans []*Translation `protobuf:"bytes,14,rep,name=trans,proto3" json:"trans,omitempty"` - Culture string `protobuf:"bytes,15,opt,name=culture,proto3" json:"culture,omitempty"` - Texttime int64 `protobuf:"varint,16,opt,name=texttime,proto3" json:"texttime,omitempty"` // pkg timestamp -} - -func (x *Text) Reset() { - *x = Text{} - if protoimpl.UnsafeEnabled { - mi := &file_chat_text_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Text) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Text) ProtoMessage() {} - -func (x *Text) ProtoReflect() protoreflect.Message { - mi := &file_chat_text_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Text.ProtoReflect.Descriptor instead. -func (*Text) Descriptor() ([]byte, []int) { - return file_chat_text_proto_rawDescGZIP(), []int{0} -} - -func (x *Text) GetVendor() int32 { - if x != nil { - return x.Vendor - } - return 0 -} - -func (x *Text) GetVersion() int32 { - if x != nil { - return x.Version - } - return 0 -} - -func (x *Text) GetSeqnum() int32 { - if x != nil { - return x.Seqnum - } - return 0 -} - -func (x *Text) GetUid() int32 { - if x != nil { - return x.Uid - } - return 0 -} - -func (x *Text) GetFlag() int32 { - if x != nil { - return x.Flag - } - return 0 -} - -func (x *Text) GetTime() int64 { - if x != nil { - return x.Time - } - return 0 -} - -func (x *Text) GetLang() int32 { - if x != nil { - return x.Lang - } - return 0 -} - -func (x *Text) GetStarttime() int32 { - if x != nil { - return x.Starttime - } - return 0 -} - -func (x *Text) GetOfftime() int32 { - if x != nil { - return x.Offtime - } - return 0 -} - -func (x *Text) GetWords() []*Word { - if x != nil { - return x.Words - } - return nil -} - -func (x *Text) GetEndOfSegment() bool { - if x != nil { - return x.EndOfSegment - } - return false -} - -func (x *Text) GetDurationMs() int32 { - if x != nil { - return x.DurationMs - } - return 0 -} - -func (x *Text) GetDataType() string { - if x != nil { - return x.DataType - } - return "" -} - -func (x *Text) GetTrans() []*Translation { - if x != nil { - return x.Trans - } - return nil -} - -func (x *Text) GetCulture() string { - if x != nil { - return x.Culture - } - return "" -} - -func (x *Text) GetTexttime() int64 { - if x != nil { - return x.Texttime - } - return 0 -} - -type Word struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"` - StartMs int32 `protobuf:"varint,2,opt,name=start_ms,json=startMs,proto3" json:"start_ms,omitempty"` - DurationMs int32 `protobuf:"varint,3,opt,name=duration_ms,json=durationMs,proto3" json:"duration_ms,omitempty"` - IsFinal bool `protobuf:"varint,4,opt,name=is_final,json=isFinal,proto3" json:"is_final,omitempty"` - Confidence float64 `protobuf:"fixed64,5,opt,name=confidence,proto3" json:"confidence,omitempty"` -} - -func (x *Word) Reset() { - *x = Word{} - if protoimpl.UnsafeEnabled { - mi := &file_chat_text_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Word) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Word) ProtoMessage() {} - -func (x *Word) ProtoReflect() protoreflect.Message { - mi := &file_chat_text_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Word.ProtoReflect.Descriptor instead. -func (*Word) Descriptor() ([]byte, []int) { - return file_chat_text_proto_rawDescGZIP(), []int{1} -} - -func (x *Word) GetText() string { - if x != nil { - return x.Text - } - return "" -} - -func (x *Word) GetStartMs() int32 { - if x != nil { - return x.StartMs - } - return 0 -} - -func (x *Word) GetDurationMs() int32 { - if x != nil { - return x.DurationMs - } - return 0 -} - -func (x *Word) GetIsFinal() bool { - if x != nil { - return x.IsFinal - } - return false -} - -func (x *Word) GetConfidence() float64 { - if x != nil { - return x.Confidence - } - return 0 -} - -type Translation struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - IsFinal bool `protobuf:"varint,1,opt,name=is_final,json=isFinal,proto3" json:"is_final,omitempty"` - Lang string `protobuf:"bytes,2,opt,name=lang,proto3" json:"lang,omitempty"` - Texts []string `protobuf:"bytes,3,rep,name=texts,proto3" json:"texts,omitempty"` -} - -func (x *Translation) Reset() { - *x = Translation{} - if protoimpl.UnsafeEnabled { - mi := &file_chat_text_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Translation) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Translation) ProtoMessage() {} - -func (x *Translation) ProtoReflect() protoreflect.Message { - mi := &file_chat_text_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Translation.ProtoReflect.Descriptor instead. -func (*Translation) Descriptor() ([]byte, []int) { - return file_chat_text_proto_rawDescGZIP(), []int{2} -} - -func (x *Translation) GetIsFinal() bool { - if x != nil { - return x.IsFinal - } - return false -} - -func (x *Translation) GetLang() string { - if x != nil { - return x.Lang - } - return "" -} - -func (x *Translation) GetTexts() []string { - if x != nil { - return x.Texts - } - return nil -} - -var File_chat_text_proto protoreflect.FileDescriptor - -var file_chat_text_proto_rawDesc = []byte{ - 0x0a, 0x0f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x12, 0x16, 0x61, 0x67, 0x6f, 0x72, 0x61, 0x2e, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, 0x72, - 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x72, 0x22, 0xdf, 0x03, 0x0a, 0x04, 0x54, 0x65, - 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x65, 0x6e, 0x64, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x05, 0x52, 0x06, 0x76, 0x65, 0x6e, 0x64, 0x6f, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x65, 0x71, 0x6e, 0x75, 0x6d, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x65, 0x71, 0x6e, 0x75, 0x6d, 0x12, 0x10, 0x0a, 0x03, - 0x75, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x75, 0x69, 0x64, 0x12, 0x12, - 0x0a, 0x04, 0x66, 0x6c, 0x61, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x66, 0x6c, - 0x61, 0x67, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x61, 0x6e, 0x67, 0x18, 0x07, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6c, 0x61, 0x6e, 0x67, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x74, - 0x61, 0x72, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, - 0x74, 0x61, 0x72, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x66, 0x66, 0x74, - 0x69, 0x6d, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x6f, 0x66, 0x66, 0x74, 0x69, - 0x6d, 0x65, 0x12, 0x32, 0x0a, 0x05, 0x77, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x1c, 0x2e, 0x61, 0x67, 0x6f, 0x72, 0x61, 0x2e, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x72, 0x2e, 0x57, 0x6f, 0x72, 0x64, 0x52, - 0x05, 0x77, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x65, 0x6e, 0x64, 0x5f, 0x6f, 0x66, - 0x5f, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, - 0x65, 0x6e, 0x64, 0x4f, 0x66, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, - 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6d, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x0a, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x73, 0x12, 0x1b, 0x0a, - 0x09, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x39, 0x0a, 0x05, 0x74, 0x72, - 0x61, 0x6e, 0x73, 0x18, 0x0e, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x61, 0x67, 0x6f, 0x72, - 0x61, 0x2e, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x62, - 0x65, 0x72, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x05, - 0x74, 0x72, 0x61, 0x6e, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x75, 0x6c, 0x74, 0x75, 0x72, 0x65, - 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x63, 0x75, 0x6c, 0x74, 0x75, 0x72, 0x65, 0x12, - 0x1a, 0x0a, 0x08, 0x74, 0x65, 0x78, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x10, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x08, 0x74, 0x65, 0x78, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x22, 0x91, 0x01, 0x0a, 0x04, - 0x57, 0x6f, 0x72, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x74, 0x61, 0x72, - 0x74, 0x5f, 0x6d, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x73, 0x74, 0x61, 0x72, - 0x74, 0x4d, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x6d, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x4d, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x69, 0x73, 0x5f, 0x66, 0x69, 0x6e, 0x61, 0x6c, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x69, 0x73, 0x46, 0x69, 0x6e, 0x61, 0x6c, 0x12, - 0x1e, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x64, 0x65, 0x6e, 0x63, 0x65, 0x18, 0x05, 0x20, - 0x01, 0x28, 0x01, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x64, 0x65, 0x6e, 0x63, 0x65, 0x22, - 0x52, 0x0a, 0x0b, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x19, - 0x0a, 0x08, 0x69, 0x73, 0x5f, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x07, 0x69, 0x73, 0x46, 0x69, 0x6e, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x61, 0x6e, - 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6c, 0x61, 0x6e, 0x67, 0x12, 0x14, 0x0a, - 0x05, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x05, 0x74, 0x65, - 0x78, 0x74, 0x73, 0x42, 0x06, 0x5a, 0x04, 0x2e, 0x3b, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, -} - -var ( - file_chat_text_proto_rawDescOnce sync.Once - file_chat_text_proto_rawDescData = file_chat_text_proto_rawDesc -) - -func file_chat_text_proto_rawDescGZIP() []byte { - file_chat_text_proto_rawDescOnce.Do(func() { - file_chat_text_proto_rawDescData = protoimpl.X.CompressGZIP(file_chat_text_proto_rawDescData) - }) - return file_chat_text_proto_rawDescData -} - -var file_chat_text_proto_msgTypes = make([]protoimpl.MessageInfo, 3) -var file_chat_text_proto_goTypes = []interface{}{ - (*Text)(nil), // 0: agora.chat_transcriber.Text - (*Word)(nil), // 1: agora.chat_transcriber.Word - (*Translation)(nil), // 2: agora.chat_transcriber.Translation -} -var file_chat_text_proto_depIdxs = []int32{ - 1, // 0: agora.chat_transcriber.Text.words:type_name -> agora.chat_transcriber.Word - 2, // 1: agora.chat_transcriber.Text.trans:type_name -> agora.chat_transcriber.Translation - 2, // [2:2] is the sub-list for method output_type - 2, // [2:2] is the sub-list for method input_type - 2, // [2:2] is the sub-list for extension type_name - 2, // [2:2] is the sub-list for extension extendee - 0, // [0:2] is the sub-list for field type_name -} - -func init() { file_chat_text_proto_init() } -func file_chat_text_proto_init() { - if File_chat_text_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_chat_text_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Text); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_chat_text_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Word); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_chat_text_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Translation); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_chat_text_proto_rawDesc, - NumEnums: 0, - NumMessages: 3, - NumExtensions: 0, - NumServices: 0, - }, - GoTypes: file_chat_text_proto_goTypes, - DependencyIndexes: file_chat_text_proto_depIdxs, - MessageInfos: file_chat_text_proto_msgTypes, - }.Build() - File_chat_text_proto = out.File - file_chat_text_proto_rawDesc = nil - file_chat_text_proto_goTypes = nil - file_chat_text_proto_depIdxs = nil -} diff --git a/agents/addon/extension/chat_transcriber/pb/chat_text.proto b/agents/addon/extension/chat_transcriber/pb/chat_text.proto deleted file mode 100644 index 9ee4e504b..000000000 --- a/agents/addon/extension/chat_transcriber/pb/chat_text.proto +++ /dev/null @@ -1,37 +0,0 @@ -syntax = "proto3"; - -package agora.chat_transcriber; -option go_package = ".;pb"; - -message Text { - int32 vendor = 1; - int32 version = 2; - int32 seqnum = 3; - int32 uid = 4; - int32 flag = 5; - int64 time = 6; // final time =first nofinal time - int32 lang = 7; - int32 starttime = 8; - int32 offtime = 9; - repeated Word words = 10; - bool end_of_segment = 11; - int32 duration_ms = 12; - string data_type = 13; // transcribe ,translate - repeated Translation trans = 14; - string culture = 15; - int64 texttime = 16; // pkg timestamp -} - -message Word { - string text = 1; - int32 start_ms = 2; - int32 duration_ms = 3; - bool is_final = 4; - double confidence = 5; -} - -message Translation { - bool is_final = 1; - string lang = 2; - repeated string texts = 3; -} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber/property.json b/agents/addon/extension/chat_transcriber/property.json deleted file mode 100644 index 9e26dfeeb..000000000 --- a/agents/addon/extension/chat_transcriber/property.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go deleted file mode 100644 index 4d712e4f3..000000000 --- a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go +++ /dev/null @@ -1,82 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by XinHui Li in 2024-07. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. -package extension - -import ( - "context" - "fmt" - "io" - "time" - - elevenlabs "github.com/haguro/elevenlabs-go" -) - -type elevenlabsTTS struct { - client *elevenlabs.Client - config elevenlabsTTSConfig -} - -type elevenlabsTTSConfig struct { - ApiKey string - ModelId string - OptimizeStreamingLatency int - RequestTimeoutSeconds int - SimilarityBoost float32 - SpeakerBoost bool - Stability float32 - Style float32 - VoiceId string -} - -func defaultElevenlabsTTSConfig() elevenlabsTTSConfig { - return elevenlabsTTSConfig{ - ApiKey: "", - ModelId: "eleven_multilingual_v2", - OptimizeStreamingLatency: 0, - RequestTimeoutSeconds: 30, - SimilarityBoost: 0.75, - SpeakerBoost: false, - Stability: 0.5, - Style: 0.0, - VoiceId: "pNInz6obpgDQGcFmaJgB", - } -} - -func newElevenlabsTTS(config elevenlabsTTSConfig) (*elevenlabsTTS, error) { - return &elevenlabsTTS{ - config: config, - client: elevenlabs.NewClient(context.Background(), config.ApiKey, time.Duration(config.RequestTimeoutSeconds)*time.Second), - }, nil -} - -func (e *elevenlabsTTS) textToSpeechStream(streamWriter io.Writer, text string) (err error) { - req := elevenlabs.TextToSpeechRequest{ - Text: text, - ModelID: e.config.ModelId, - VoiceSettings: &elevenlabs.VoiceSettings{ - SimilarityBoost: e.config.SimilarityBoost, - SpeakerBoost: e.config.SpeakerBoost, - Stability: e.config.Stability, - Style: e.config.Style, - }, - } - queries := []elevenlabs.QueryFunc{ - elevenlabs.LatencyOptimizations(e.config.OptimizeStreamingLatency), - elevenlabs.OutputFormat("pcm_16000"), - } - - err = e.client.TextToSpeechStream(streamWriter, e.config.VoiceId, req, queries...) - if err != nil { - return fmt.Errorf("TextToSpeechStream failed, err: %v", err) - } - - return nil -} diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go deleted file mode 100644 index 3b6ef4fd7..000000000 --- a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go +++ /dev/null @@ -1,340 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by XinHui Li in 2024-07. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. -package extension - -import ( - "fmt" - "io" - "log/slog" - "sync" - "sync/atomic" - "time" - - "agora.io/rte/rtego" -) - -const ( - cmdInFlush = "flush" - cmdOutFlush = "flush" - dataInTextDataPropertyText = "text" - - propertyApiKey = "api_key" // Required - propertyModelId = "model_id" // Optional - propertyOptimizeStreamingLatency = "optimize_streaming_latency" // Optional - propertyRequestTimeoutSeconds = "request_timeout_seconds" // Optional - propertySimilarityBoost = "similarity_boost" // Optional - propertySpeakerBoost = "speaker_boost" // Optional - propertyStability = "stability" // Optional - propertyStyle = "style" // Optional - propertyVoiceId = "voice_id" // Optional -) - -const ( - textChanMax = 1024 -) - -var ( - logTag = slog.String("extension", "ELEVENLABS_TTS_EXTENSION") - - outdateTs atomic.Int64 - textChan chan *message - wg sync.WaitGroup -) - -type elevenlabsTTSExtension struct { - rtego.DefaultExtension - elevenlabsTTS *elevenlabsTTS -} - -type message struct { - text string - receivedTs int64 -} - -func newElevenlabsTTSExtension(name string) rtego.Extension { - return &elevenlabsTTSExtension{} -} - -// OnStart will be called when the extension is starting, -// properies can be read here to initialize and start the extension. -// current supported properties: -// - api_key (required) -// - model_id -// - optimize_streaming_latency -// - request_timeout_seconds -// - similarity_boost -// - speaker_boost -// - stability -// - style -// - voice_id -func (e *elevenlabsTTSExtension) OnStart(rte rtego.Rte) { - slog.Info("OnStart", logTag) - - // prepare configuration - elevenlabsTTSConfig := defaultElevenlabsTTSConfig() - - if apiKey, err := rte.GetPropertyString(propertyApiKey); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) - return - } else { - elevenlabsTTSConfig.ApiKey = apiKey - } - - if modelId, err := rte.GetPropertyString(propertyModelId); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err), logTag) - } else { - if len(modelId) > 0 { - elevenlabsTTSConfig.ModelId = modelId - } - } - - if optimizeStreamingLatency, err := rte.GetPropertyInt64(propertyOptimizeStreamingLatency); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err), logTag) - } else { - if optimizeStreamingLatency > 0 { - elevenlabsTTSConfig.OptimizeStreamingLatency = int(optimizeStreamingLatency) - } - } - - if requestTimeoutSeconds, err := rte.GetPropertyInt64(propertyRequestTimeoutSeconds); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err), logTag) - } else { - if requestTimeoutSeconds > 0 { - elevenlabsTTSConfig.RequestTimeoutSeconds = int(requestTimeoutSeconds) - } - } - - if similarityBoost, err := rte.GetPropertyFloat64(propertySimilarityBoost); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySimilarityBoost, err), logTag) - } else { - elevenlabsTTSConfig.SimilarityBoost = float32(similarityBoost) - } - - if speakerBoost, err := rte.GetPropertyBool(propertySpeakerBoost); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySpeakerBoost, err), logTag) - } else { - elevenlabsTTSConfig.SpeakerBoost = speakerBoost - } - - if stability, err := rte.GetPropertyFloat64(propertyStability); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStability, err), logTag) - } else { - elevenlabsTTSConfig.Stability = float32(stability) - } - - if style, err := rte.GetPropertyFloat64(propertyStyle); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStyle, err), logTag) - } else { - elevenlabsTTSConfig.Style = float32(style) - } - - if voiceId, err := rte.GetPropertyString(propertyVoiceId); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyVoiceId, err), logTag) - } else { - if len(voiceId) > 0 { - elevenlabsTTSConfig.VoiceId = voiceId - } - } - - // create elevenlabsTTS instance - elevenlabsTTS, err := newElevenlabsTTS(elevenlabsTTSConfig) - if err != nil { - slog.Error(fmt.Sprintf("newElevenlabsTTS failed, err: %v", err), logTag) - return - } - - slog.Info(fmt.Sprintf("newElevenlabsTTS succeed with ModelId: %s, VoiceId: %s", - elevenlabsTTSConfig.ModelId, elevenlabsTTSConfig.VoiceId), logTag) - - // set elevenlabsTTS instance - e.elevenlabsTTS = elevenlabsTTS - - // create pcm instance - pcm := newPcm(defaultPcmConfig()) - pcmFrameSize := pcm.getPcmFrameSize() - - // init chan - textChan = make(chan *message, textChanMax) - - go func() { - slog.Info("process textChan", logTag) - - for msg := range textChan { - if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) - continue - } - - wg.Add(1) - slog.Info(fmt.Sprintf("textChan text: [%s]", msg.text), logTag) - - r, w := io.Pipe() - startTime := time.Now() - - go func() { - defer wg.Done() - defer w.Close() - - slog.Info(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text), logTag) - - err = e.elevenlabsTTS.textToSpeechStream(w, msg.text) - if err != nil { - slog.Error(fmt.Sprintf("textToSpeechStream failed, err: %v", err), logTag) - return - } - }() - - slog.Info(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize), logTag) - - var ( - firstFrameLatency int64 - n int - pcmFrameRead int - readBytes int - sentFrames int - ) - buf := pcm.newBuf() - - // read pcm stream - for { - if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", - msg.text, msg.receivedTs, outdateTs.Load()), logTag) - break - } - - n, err = r.Read(buf[pcmFrameRead:]) - readBytes += n - pcmFrameRead += n - - if err != nil { - if err == io.EOF { - slog.Info("read pcm stream EOF", logTag) - break - } - - slog.Error(fmt.Sprintf("read pcm stream failed, err: %v", err), logTag) - break - } - - if pcmFrameRead != pcmFrameSize { - slog.Debug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead), logTag) - continue - } - - pcm.send(rte, buf) - // clear buf - buf = pcm.newBuf() - pcmFrameRead = 0 - sentFrames++ - - if firstFrameLatency == 0 { - firstFrameLatency = time.Since(startTime).Milliseconds() - slog.Info(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency), logTag) - } - - slog.Debug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text), logTag) - } - - if pcmFrameRead > 0 { - pcm.send(rte, buf) - sentFrames++ - slog.Info(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead), logTag) - } - - r.Close() - slog.Info(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", - msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds()), logTag) - } - }() - - rte.OnStartDone() -} - -// OnCmd receives cmd from rte graph. -// current supported cmd: -// - name: flush -// example: -// {"name": "flush"} -func (e *elevenlabsTTSExtension) OnCmd( - rte rtego.Rte, - cmd rtego.Cmd, -) { - cmdName, err := cmd.CmdName() - if err != nil { - slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) - return - } - - slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) - - switch cmdName { - case cmdInFlush: - outdateTs.Store(time.Now().UnixMicro()) - - // send out - outCmd, err := rtego.NewCmd(cmdOutFlush) - if err != nil { - slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) - return - } - - if err := rte.SendCmd(outCmd, nil); err != nil { - slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) - return - } else { - slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) - } - } - - rte.ReturnString(rtego.Ok, "ok", cmd) -} - -// OnData receives data from rte graph. -// current supported data: -// - name: text_data -// example: -// {name: text_data, properties: {text: "hello"} -func (e *elevenlabsTTSExtension) OnData( - rte rtego.Rte, - data rtego.Data, -) { - text, err := data.GetPropertyString(dataInTextDataPropertyText) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) - return - } - - if len(text) == 0 { - slog.Debug("OnData text is empty, ignored", logTag) - return - } - - slog.Info(fmt.Sprintf("OnData input text: [%s]", text), logTag) - - go func() { - textChan <- &message{text: text, receivedTs: time.Now().UnixMicro()} - }() -} - -func init() { - slog.Info("elevenlabs_tts extension init", logTag) - - // Register addon - rtego.RegisterAddonAsExtension( - "elevenlabs_tts", - rtego.NewDefaultExtensionAddon(newElevenlabsTTSExtension), - ) -} diff --git a/agents/addon/extension/elevenlabs_tts/go.mod b/agents/addon/extension/elevenlabs_tts/go.mod deleted file mode 100644 index bb90f1c61..000000000 --- a/agents/addon/extension/elevenlabs_tts/go.mod +++ /dev/null @@ -1,10 +0,0 @@ -module elevenlabs_tts - -go 1.21 - -replace agora.io/rte => ../../../interface - -require ( - agora.io/rte v0.0.0-00010101000000-000000000000 - github.com/haguro/elevenlabs-go v0.2.4 -) diff --git a/agents/addon/extension/elevenlabs_tts/go.sum b/agents/addon/extension/elevenlabs_tts/go.sum deleted file mode 100644 index 6c1feddc6..000000000 --- a/agents/addon/extension/elevenlabs_tts/go.sum +++ /dev/null @@ -1,2 +0,0 @@ -github.com/haguro/elevenlabs-go v0.2.4 h1:Z1a/I+b5fAtGSfrhEj97dYG1EbV9uRzSfvz5n5+ud34= -github.com/haguro/elevenlabs-go v0.2.4/go.mod h1:j15h9w2BpgxlIGWXmCKWPPDaTo2QAO83zFy5J+pFCt8= diff --git a/agents/addon/extension/elevenlabs_tts/manifest.json b/agents/addon/extension/elevenlabs_tts/manifest.json deleted file mode 100644 index 620fb2248..000000000 --- a/agents/addon/extension/elevenlabs_tts/manifest.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "type": "extension", - "name": "elevenlabs_tts", - "version": "0.1.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.1.0" - } - ], - "api": { - "property": { - "api_key": { - "type": "string" - }, - "model_id": { - "type": "string" - }, - "request_timeout_seconds": { - "type": "int64" - }, - "similarity_boost": { - "type": "float64" - }, - "speaker_boost": { - "type": "bool" - }, - "stability": { - "type": "float64" - }, - "style": { - "type": "float64" - }, - "optimize_streaming_latency": { - "type": "int64" - }, - "voice_id": { - "type": "string" - } - }, - "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - } - } - } - ], - "cmd_in": [ - { - "name": "flush" - } - ], - "cmd_out": [ - { - "name": "flush" - } - ], - "pcm_frame_out": [ - { - "name": "pcm_frame" - } - ] - } -} diff --git a/agents/addon/extension/elevenlabs_tts/pcm.go b/agents/addon/extension/elevenlabs_tts/pcm.go deleted file mode 100644 index c3454b102..000000000 --- a/agents/addon/extension/elevenlabs_tts/pcm.go +++ /dev/null @@ -1,104 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by XinHui Li in 2024-07. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. -package extension - -import ( - "fmt" - "log/slog" - - "agora.io/rte/rtego" -) - -type pcm struct { - config *pcmConfig -} - -type pcmConfig struct { - BytesPerSample int32 - Channel int32 - ChannelLayout uint64 - Name string - SampleRate int32 - SamplesPerChannel int32 - Timestamp int64 -} - -func defaultPcmConfig() *pcmConfig { - return &pcmConfig{ - BytesPerSample: 2, - Channel: 1, - ChannelLayout: 1, - Name: "pcm_frame", - SampleRate: 16000, - SamplesPerChannel: 16000 / 100, - Timestamp: 0, - } -} - -func newPcm(config *pcmConfig) *pcm { - return &pcm{ - config: config, - } -} - -func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rtego.PcmFrame, err error) { - pcmFrame, err = rtego.NewPcmFrame(p.config.Name) - if err != nil { - slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) - return - } - - // set pcm frame - pcmFrame.SetBytesPerSample(p.config.BytesPerSample) - pcmFrame.SetSampleRate(p.config.SampleRate) - pcmFrame.SetChannelLayout(p.config.ChannelLayout) - pcmFrame.SetNumberOfChannels(p.config.Channel) - pcmFrame.SetTimestamp(p.config.Timestamp) - pcmFrame.SetDataFmt(rtego.PcmFrameDataFmtInterleave) - pcmFrame.SetSamplesPerChannel(p.config.SamplesPerChannel) - pcmFrame.AllocBuf(p.getPcmFrameSize()) - - borrowedBuf, err := pcmFrame.BorrowBuf() - if err != nil { - slog.Error(fmt.Sprintf("BorrowBuf failed, err: %v", err), logTag) - return - } - - // copy data - copy(borrowedBuf, buf) - - pcmFrame.GiveBackBuf(&borrowedBuf) - return -} - -func (p *pcm) getPcmFrameSize() int { - return int(p.config.SamplesPerChannel * p.config.Channel * p.config.BytesPerSample) -} - -func (p *pcm) newBuf() []byte { - return make([]byte, p.getPcmFrameSize()) -} - -func (p *pcm) send(rte rtego.Rte, buf []byte) (err error) { - pcmFrame, err := p.getPcmFrame(buf) - if err != nil { - slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) - return - } - - // send pcm - if err = rte.SendPcmFrame(pcmFrame); err != nil { - slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) - return - } - - return -} diff --git a/agents/addon/extension/elevenlabs_tts/property.json b/agents/addon/extension/elevenlabs_tts/property.json deleted file mode 100644 index 9e26dfeeb..000000000 --- a/agents/addon/extension/elevenlabs_tts/property.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector/extension.go b/agents/addon/extension/interrupt_detector/extension.go deleted file mode 100644 index 962940e06..000000000 --- a/agents/addon/extension/interrupt_detector/extension.go +++ /dev/null @@ -1,78 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by Wei Hu in 2022-10. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. -package extension - -import ( - "fmt" - "log/slog" - - "agora.io/rte/rtego" -) - -const ( - textDataTextField = "text" - textDataFinalField = "is_final" - - cmdNameFlush = "flush" -) - -var ( - logTag = slog.String("extension", "INTERRUPT_DETECTOR_EXTENSION") -) - -type interruptDetectorExtension struct { - rtego.DefaultExtension -} - -func newExtension(name string) rtego.Extension { - return &interruptDetectorExtension{} -} - -// OnData receives data from rte graph. -// current supported data: -// - name: text_data -// example: -// {name: text_data, properties: {text: "hello", is_final: false} -func (p *interruptDetectorExtension) OnData( - rte rtego.Rte, - data rtego.Data, -) { - text, err := data.GetPropertyString(textDataTextField) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataTextField, err), logTag) - return - } - - final, err := data.GetPropertyBool(textDataFinalField) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataFinalField, err), logTag) - return - } - - slog.Debug(fmt.Sprintf("OnData %s: %s %s: %t", textDataTextField, text, textDataFinalField, final), logTag) - - if final || len(text) >= 2 { - flushCmd, _ := rtego.NewCmd(cmdNameFlush) - rte.SendCmd(flushCmd, nil) - - slog.Info(fmt.Sprintf("sent cmd: %s", cmdNameFlush), logTag) - } -} - -func init() { - slog.Info("interrupt_detector extension init", logTag) - - // Register addon - rtego.RegisterAddonAsExtension( - "interrupt_detector", - rtego.NewDefaultExtensionAddon(newExtension), - ) -} diff --git a/agents/addon/extension/interrupt_detector/go.mod b/agents/addon/extension/interrupt_detector/go.mod deleted file mode 100644 index bced26e06..000000000 --- a/agents/addon/extension/interrupt_detector/go.mod +++ /dev/null @@ -1,7 +0,0 @@ -module extension - -go 1.18 - -replace agora.io/rte => ../../../interface - -require agora.io/rte v0.0.0-00010101000000-000000000000 diff --git a/agents/addon/extension/interrupt_detector/manifest.json b/agents/addon/extension/interrupt_detector/manifest.json deleted file mode 100644 index 05781cf5a..000000000 --- a/agents/addon/extension/interrupt_detector/manifest.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "type": "extension", - "name": "interrupt_detector", - "version": "0.1.0", - "language": "go", - "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.1.0" - } - ], - "api": { - "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "is_final": { - "type": "bool" - } - } - } - ], - "cmd_out": [ - { - "name": "flush" - } - ] - } -} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector/property.json b/agents/addon/extension/interrupt_detector/property.json deleted file mode 100644 index 9e26dfeeb..000000000 --- a/agents/addon/extension/interrupt_detector/property.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/README.md b/agents/addon/extension/openai_chatgpt/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/agents/addon/extension/openai_chatgpt/go.mod b/agents/addon/extension/openai_chatgpt/go.mod deleted file mode 100644 index 5bb6b52b8..000000000 --- a/agents/addon/extension/openai_chatgpt/go.mod +++ /dev/null @@ -1,17 +0,0 @@ -module openai_chatgpt - -go 1.21 - -replace agora.io/rte => ../../../interface - -require ( - agora.io/rte v0.0.0-00010101000000-000000000000 - github.com/sashabaranov/go-openai v1.24.1 - github.com/stretchr/testify v1.9.0 -) - -require ( - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/agents/addon/extension/openai_chatgpt/go.sum b/agents/addon/extension/openai_chatgpt/go.sum deleted file mode 100644 index 64a09f354..000000000 --- a/agents/addon/extension/openai_chatgpt/go.sum +++ /dev/null @@ -1,12 +0,0 @@ -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/sashabaranov/go-openai v1.24.1 h1:DWK95XViNb+agQtuzsn+FyHhn3HQJ7Va8z04DQDJ1MI= -github.com/sashabaranov/go-openai v1.24.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/agents/addon/extension/openai_chatgpt/manifest.json b/agents/addon/extension/openai_chatgpt/manifest.json deleted file mode 100644 index aa4f6d50d..000000000 --- a/agents/addon/extension/openai_chatgpt/manifest.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "type": "extension", - "name": "openai_chatgpt", - "version": "0.1.0", - "language": "go", - "support": [], - "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, - { - "type": "system", - "name": "rte_runtime_go", - "version": "0.1.0" - } - ], - "api": { - "property": { - "api_key": { - "type": "string" - }, - "frequency_penalty": { - "type": "float64" - }, - "presence_penalty": { - "type": "float64" - }, - "model": { - "type": "string" - }, - "max_tokens": { - "type": "int64" - }, - "prompt": { - "type": "string" - }, - "greeting": { - "type": "string" - }, - "max_memory_length": { - "type": "int64" - } - }, - "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "is_final": { - "type": "bool" - } - } - } - ], - "data_out": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "end_of_segment": { - "type": "bool" - } - } - } - ], - "cmd_in": [ - { - "name": "flush" - } - ], - "cmd_out": [ - { - "name": "flush" - } - ] - } -} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt.go deleted file mode 100644 index 1a09ed141..000000000 --- a/agents/addon/extension/openai_chatgpt/openai_chatgpt.go +++ /dev/null @@ -1,111 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by lixinhui in 2024. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. -package extension - -import ( - "context" - "fmt" - "math/rand" - "net/http" - "net/url" - - openai "github.com/sashabaranov/go-openai" -) - -type openaiChatGPT struct { - client *openai.Client - config openaiChatGPTConfig -} - -type openaiChatGPTConfig struct { - BaseUrl string - ApiKey string - - Model string - Prompt string - - FrequencyPenalty float32 - PresencePenalty float32 - TopP float32 - Temperature float32 - MaxTokens int - Seed int - - ProxyUrl string -} - -func defaultOpenaiChatGPTConfig() openaiChatGPTConfig { - return openaiChatGPTConfig{ - BaseUrl: "https://api.openai.com/v1", - ApiKey: "", - - Model: openai.GPT4o, - Prompt: "You are a voice assistant who talks in a conversational way and can chat with me like my friends. i will speak to you in english or chinese, and you will answer in the corrected and improved version of my text with the language i use. Don't talk like a robot, instead i would like you to talk like real human with emotions. i will use your answer for text-to-speech, so don't return me any meaningless characters. I want you to be helpful, when i'm asking you for advices, give me precise, practical and useful advices instead of being vague. When giving me list of options, express the options in a narrative way instead of bullet points.", - - FrequencyPenalty: 0.9, - PresencePenalty: 0.9, - TopP: 1.0, - Temperature: 0.1, - MaxTokens: 512, - Seed: rand.Int(), - - ProxyUrl: "", - } -} - -func newOpenaiChatGPT(config openaiChatGPTConfig) (*openaiChatGPT, error) { - conf := openai.DefaultConfig(config.ApiKey) - - if config.BaseUrl != "" { - conf.BaseURL = config.BaseUrl - } - - if config.ProxyUrl != "" { - proxyUrl, err := url.Parse(config.ProxyUrl) - if err != nil { - return nil, fmt.Errorf("newOpenaiChatGPT failed on parsing proxy url, err: %v", err) - } - conf.HTTPClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}} - } - - return &openaiChatGPT{ - config: config, - client: openai.NewClientWithConfig(conf), - }, nil -} - -func (c *openaiChatGPT) getChatCompletionsStream(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionStream, error) { - req := openai.ChatCompletionRequest{ - Temperature: c.config.Temperature, - TopP: c.config.TopP, - PresencePenalty: c.config.PresencePenalty, - FrequencyPenalty: c.config.FrequencyPenalty, - MaxTokens: c.config.MaxTokens, - Seed: &c.config.Seed, - Messages: append( - []openai.ChatCompletionMessage{ - { - Role: openai.ChatMessageRoleSystem, - Content: c.config.Prompt, - }, - }, - messages..., - ), - Model: c.config.Model, - Stream: true, - } - - resp, err := c.client.CreateChatCompletionStream(context.Background(), req) - if err != nil { - return nil, fmt.Errorf("CreateChatCompletionStream failed,err: %v", err) - } - return resp, nil -} diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go deleted file mode 100644 index 6abdaa515..000000000 --- a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go +++ /dev/null @@ -1,391 +0,0 @@ -/** - * - * Agora Real Time Engagement - * Created by lixinhui in 2024. - * Copyright (c) 2024 Agora IO. All rights reserved. - * - */ -// Note that this is just an example extension written in the GO programming -// language, so the package name does not equal to the containing directory -// name. However, it is not common in Go. -package extension - -import ( - "errors" - "fmt" - "io" - "log/slog" - "sync" - "sync/atomic" - "time" - - "agora.io/rte/rtego" - openai "github.com/sashabaranov/go-openai" -) - -var ( - logTag = slog.String("extension", "OPENAI_CHATGPT_EXTENSION") -) - -type openaiChatGPTExtension struct { - rtego.DefaultExtension - openaiChatGPT *openaiChatGPT -} - -const ( - cmdInFlush = "flush" - cmdOutFlush = "flush" - dataInTextDataPropertyText = "text" - dataInTextDataPropertyIsFinal = "is_final" - dataOutTextDataPropertyText = "text" - dataOutTextDataPropertyTextEndOfSegment = "end_of_segment" - - propertyBaseUrl = "base_url" // Optional - propertyApiKey = "api_key" // Required - propertyModel = "model" // Optional - propertyPrompt = "prompt" // Optional - propertyFrequencyPenalty = "frequency_penalty" // Optional - propertyPresencePenalty = "presence_penalty" // Optional - propertyTemperature = "temperature" // Optional - propertyTopP = "top_p" // Optional - propertyMaxTokens = "max_tokens" // Optional - propertyGreeting = "greeting" // Optional - propertyProxyUrl = "proxy_url" // Optional - propertyMaxMemoryLength = "max_memory_length" // Optional -) - -var ( - memory []openai.ChatCompletionMessage - memoryChan chan openai.ChatCompletionMessage - maxMemoryLength = 10 - - outdateTs atomic.Int64 - wg sync.WaitGroup -) - -func newChatGPTExtension(name string) rtego.Extension { - return &openaiChatGPTExtension{} -} - -// OnStart will be called when the extension is starting, -// properies can be read here to initialize and start the extension. -// current supported properties: -// - api_key (required) -// - model -// - prompt -// - frequency_penalty -// - presence_penalty -// - temperature -// - top_p -// - max_tokens -// - greeting -// - proxy_url -func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { - slog.Info("OnStart", logTag) - - // prepare configuration - openaiChatGPTConfig := defaultOpenaiChatGPTConfig() - - if baseUrl, err := rte.GetPropertyString(propertyBaseUrl); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyBaseUrl, err), logTag) - } else { - if len(baseUrl) > 0 { - openaiChatGPTConfig.BaseUrl = baseUrl - } - } - - if apiKey, err := rte.GetPropertyString(propertyApiKey); err != nil { - slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) - return - } else { - openaiChatGPTConfig.ApiKey = apiKey - } - - if model, err := rte.GetPropertyString(propertyModel); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) - } else { - if len(model) > 0 { - openaiChatGPTConfig.Model = model - } - } - - if prompt, err := rte.GetPropertyString(propertyPrompt); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) - } else { - if len(prompt) > 0 { - openaiChatGPTConfig.Prompt = prompt - } - } - - if frequencyPenalty, err := rte.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyFrequencyPenalty, err), logTag) - } else { - openaiChatGPTConfig.FrequencyPenalty = float32(frequencyPenalty) - } - - if presencePenalty, err := rte.GetPropertyFloat64(propertyPresencePenalty); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyPresencePenalty, err), logTag) - } else { - openaiChatGPTConfig.PresencePenalty = float32(presencePenalty) - } - - if temperature, err := rte.GetPropertyFloat64(propertyTemperature); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) - } else { - openaiChatGPTConfig.Temperature = float32(temperature) - } - - if topP, err := rte.GetPropertyFloat64(propertyTopP); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) - } else { - openaiChatGPTConfig.TopP = float32(topP) - } - - if maxTokens, err := rte.GetPropertyInt64(propertyMaxTokens); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) - } else { - if maxTokens > 0 { - openaiChatGPTConfig.MaxTokens = int(maxTokens) - } - } - - if proxyUrl, err := rte.GetPropertyString(propertyProxyUrl); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyProxyUrl, err), logTag) - } else { - openaiChatGPTConfig.ProxyUrl = proxyUrl - } - - greeting, err := rte.GetPropertyString(propertyGreeting) - if err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) - } - - if propMaxMemoryLength, err := rte.GetPropertyInt64(propertyMaxMemoryLength); err != nil { - slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) - } else { - if propMaxMemoryLength > 0 { - maxMemoryLength = int(propMaxMemoryLength) - } - } - - // create openaiChatGPT instance - openaiChatgpt, err := newOpenaiChatGPT(openaiChatGPTConfig) - if err != nil { - slog.Error(fmt.Sprintf("newOpenaiChatGPT failed, err: %v", err), logTag) - return - } - slog.Info(fmt.Sprintf("newOpenaiChatGPT succeed with max_tokens: %d, model: %s", - openaiChatGPTConfig.MaxTokens, openaiChatGPTConfig.Model), logTag) - - p.openaiChatGPT = openaiChatgpt - - memoryChan = make(chan openai.ChatCompletionMessage, maxMemoryLength*2) - - // send greeting if available - if len(greeting) > 0 { - outputData, _ := rtego.NewData("text_data") - outputData.SetProperty(dataOutTextDataPropertyText, greeting) - outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := rte.SendData(outputData); err != nil { - slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) - } else { - slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) - } - } - - rte.OnStartDone() -} - -// OnCmd receives cmd from rte graph. -// current supported cmd: -// - name: flush -// example: -// {"name": "flush"} -func (p *openaiChatGPTExtension) OnCmd( - rte rtego.Rte, - cmd rtego.Cmd, -) { - cmdName, err := cmd.CmdName() - if err != nil { - slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) - return - } - slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) - - switch cmdName { - case cmdInFlush: - outdateTs.Store(time.Now().UnixMicro()) - - wg.Wait() // wait for chat completion stream to finish - - // send out - outCmd, err := rtego.NewCmd(cmdOutFlush) - if err != nil { - slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) - return - } - if err := rte.SendCmd(outCmd, nil); err != nil { - slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) - return - } else { - slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) - } - } - rte.ReturnString(rtego.Ok, "ok", cmd) -} - -// OnData receives data from rte graph. -// current supported data: -// - name: text_data -// example: -// {"name": "text_data", "properties": {"text": "hello", "is_final": true} -func (p *openaiChatGPTExtension) OnData( - rte rtego.Rte, - data rtego.Data, -) { - // Get isFinal - isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) - if err != nil { - slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyIsFinal, err), logTag) - return - } - if !isFinal { // ignore non-final - slog.Debug("ignore non-final input", logTag) - return - } - - // Get input text - inputText, err := data.GetPropertyString(dataInTextDataPropertyText) - if err != nil { - slog.Error(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) - return - } - if len(inputText) == 0 { - slog.Debug("ignore empty text", logTag) - return - } - slog.Info(fmt.Sprintf("OnData input text: [%s]", inputText), logTag) - - // prepare memory - for len(memoryChan) > 0 { - m, ok := <-memoryChan - if !ok { - break - } - memory = append(memory, m) - if len(memory) > maxMemoryLength { - memory = memory[1:] - } - } - memory = append(memory, openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleUser, - Content: inputText, - }) - if len(memory) > maxMemoryLength { - memory = memory[1:] - } - - // start goroutine to request and read responses from openai - wg.Add(1) - go func(startTime time.Time, inputText string, memory []openai.ChatCompletionMessage) { - defer wg.Done() - slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] memory: %v", inputText, memory), logTag) - - // Get result from ai - resp, err := p.openaiChatGPT.getChatCompletionsStream(memory) - if err != nil { - slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] failed, err: %v", inputText, err), logTag) - return - } - defer func() { - if resp != nil { // Close stream object - resp.Close() - } - }() - slog.Debug(fmt.Sprintf("GetChatCompletionsStream start to recv for input text: [%s]", inputText), logTag) - - var sentence, fullContent string - var firstSentenceSent bool - for { - if startTime.UnixMicro() < outdateTs.Load() { // Check whether to interrupt - slog.Info(fmt.Sprintf("GetChatCompletionsStream recv interrupt and flushing for input text: [%s], startTs: %d, outdateTs: %d", - inputText, startTime.UnixMicro(), outdateTs.Load()), logTag) - break - } - - chatCompletions, err := resp.Recv() - if errors.Is(err, io.EOF) { - slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s], io.EOF break", inputText), logTag) - break - } - - var content string - if len(chatCompletions.Choices) > 0 && chatCompletions.Choices[0].Delta.Content != "" { - content = chatCompletions.Choices[0].Delta.Content - } - fullContent += content - - for { - // feed content and check whether sentence is available - var sentenceIsFinal bool - sentence, content, sentenceIsFinal = parseSentence(sentence, content) - if len(sentence) == 0 || !sentenceIsFinal { - slog.Debug(fmt.Sprintf("sentence %s is empty or not final", sentence), logTag) - break - } - slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) - - // send sentence - outputData, err := rtego.NewData("text_data") - if err != nil { - slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) - break - } - outputData.SetProperty(dataOutTextDataPropertyText, sentence) - outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) - if err := rte.SendData(outputData); err != nil { - slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) - break - } else { - slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] sent sentence [%s]", inputText, sentence), logTag) - } - sentence = "" - - if !firstSentenceSent { - firstSentenceSent = true - slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] first sentence sent, first_sentency_latency %dms", - inputText, time.Since(startTime).Milliseconds()), logTag) - } - } - } - - // remember response as assistant content in memory - memoryChan <- openai.ChatCompletionMessage{ - Role: openai.ChatMessageRoleAssistant, - Content: fullContent, - } - - // send end of segment - outputData, _ := rtego.NewData("text_data") - outputData.SetProperty(dataOutTextDataPropertyText, sentence) - outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := rte.SendData(outputData); err != nil { - slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) - } else { - slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) - } - }(time.Now(), inputText, append([]openai.ChatCompletionMessage{}, memory...)) -} - -func init() { - slog.Info("init") - - // Register addon - rtego.RegisterAddonAsExtension( - "openai_chatgpt", - rtego.NewDefaultExtensionAddon(newChatGPTExtension), - ) -} diff --git a/agents/addon/extension/openai_chatgpt/property.json b/agents/addon/extension/openai_chatgpt/property.json deleted file mode 100644 index 9e26dfeeb..000000000 --- a/agents/addon/extension/openai_chatgpt/property.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/sentence.go b/agents/addon/extension/openai_chatgpt/sentence.go deleted file mode 100644 index e9b9d3104..000000000 --- a/agents/addon/extension/openai_chatgpt/sentence.go +++ /dev/null @@ -1,30 +0,0 @@ -package extension - -func isPunctuation(r rune) bool { - if r == ',' || r == ',' || - r == '.' || r == '。' || - r == '?' || r == '?' || - r == '!' || r == '!' { - return true - } - return false -} - -func parseSentence(sentence, content string) (string, string, bool) { - var remain string - var foundPunc bool - - for _, r := range content { - if !foundPunc { - sentence += string(r) - } else { - remain += string(r) - } - - if !foundPunc && isPunctuation(r) { - foundPunc = true - } - } - - return sentence, remain, foundPunc -} diff --git a/agents/addon/extension/openai_chatgpt/sentence_test.go b/agents/addon/extension/openai_chatgpt/sentence_test.go deleted file mode 100644 index b09fe3076..000000000 --- a/agents/addon/extension/openai_chatgpt/sentence_test.go +++ /dev/null @@ -1,150 +0,0 @@ -package extension - -import ( - "strings" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestIsPunctuation(t *testing.T) { - cases := []struct { - r rune - expect bool - }{ - {',', true}, - {',', true}, - {'.', true}, - {'。', true}, - {'?', true}, - {'?', true}, - {'!', true}, - {'!', true}, - - {'a', false}, - {'0', false}, - } - - for i, c := range cases { - require.Equal(t, c.expect, isPunctuation(c.r), "case %d", i) - } -} - -func TestSplitByPunctuation(t *testing.T) { - cases := []struct { - s string - expect []string - }{ - {"Hello world!", []string{"Hello world"}}, - {"Hey, there!", []string{"Hey", " there"}}, - } - - for i, c := range cases { - out := strings.FieldsFunc(c.s, isPunctuation) - require.Equal(t, c.expect, out, "case %d", i) - } -} - -func TestParseSentence_Should_NoFinalSentence(t *testing.T) { - cases := []struct { - sentence string - content string - - expectSentence string - expectContent string - }{ - { - sentence: "", - content: "", - expectSentence: "", - expectContent: "", - }, - { - sentence: "a", - content: "", - expectSentence: "a", - expectContent: "", - }, - { - sentence: "", - content: "a", - expectSentence: "a", - expectContent: "", - }, - { - sentence: "abc", - content: "ddd", - expectSentence: "abcddd", - expectContent: "", - }, - } - - for i, c := range cases { - sentence, content, final := parseSentence(c.sentence, c.content) - require.False(t, final, "case %d", i) - - require.Equal(t, c.expectSentence, sentence, "case %d", i) - require.Equal(t, c.expectContent, content, "case %d", i) - } -} - -func TestParseSentence_Should_FinalSentence(t *testing.T) { - cases := []struct { - sentence string - content string - - expectSentence string - expectContent string - }{ - { - sentence: "", - content: ",", - expectSentence: ",", - expectContent: "", - }, - { - sentence: "", - content: ",ddd", - expectSentence: ",", - expectContent: "ddd", - }, - { - sentence: "abc", - content: ",ddd", - expectSentence: "abc,", - expectContent: "ddd", - }, - { - sentence: "abc", - content: "dd,d", - expectSentence: "abcdd,", - expectContent: "d", - }, - { - sentence: "abc", - content: "ddd,", - expectSentence: "abcddd,", - expectContent: "", - }, - { - sentence: "abc", - content: "ddd,eee,fff,", - expectSentence: "abcddd,", - expectContent: "eee,fff,", - }, - { - sentence: "我的", - content: "你好,啊!", - expectSentence: "我的你好,", - expectContent: "啊!", - }, - } - - for i, c := range cases { - sentence, content, final := parseSentence(c.sentence, c.content) - require.True(t, final, "case %d", i) - - require.Equal(t, c.expectSentence, sentence, "case %d", i) - require.Equal(t, c.expectContent, content, "case %d", i) - } -} From 004c9ae8784566b04c2347049e7c794dfe760b68 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 08:49:14 +0000 Subject: [PATCH 02/72] feat: empty openai_chatgpt_python and python app --- .gitignore | 3 + Makefile | 2 +- agents/.gitignore | 1 + .../addon/extension/azure_tts/manifest.json | 4 +- agents/addon/extension/azure_tts/src/main.cc | 13 ++- .../openai_chatgpt_python/__init__.py | 3 + .../extension/openai_chatgpt_python/main.py | 67 +++++++++++ .../openai_chatgpt_python/manifest.json | 55 +++++++++ .../openai_chatgpt_python/property.json | 1 + .../openai_chatgpt_python/requirements.txt | 1 + agents/main.py | 52 +++++++++ agents/manifest.json.example | 110 ++---------------- agents/scripts/install_deps_and_build.sh | 2 +- agents/scripts/package.sh | 26 ++++- agents/start | 37 ++++++ 15 files changed, 262 insertions(+), 115 deletions(-) create mode 100644 agents/addon/extension/openai_chatgpt_python/__init__.py create mode 100644 agents/addon/extension/openai_chatgpt_python/main.py create mode 100644 agents/addon/extension/openai_chatgpt_python/manifest.json create mode 100644 agents/addon/extension/openai_chatgpt_python/property.json create mode 100644 agents/addon/extension/openai_chatgpt_python/requirements.txt create mode 100644 agents/main.py create mode 100755 agents/start diff --git a/.gitignore b/.gitignore index 4b7cc5a63..b08f1f735 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ crash_context_v1 include/ interface/ lib/ +lib64 agents/manifest.json agents/manifest.elevenlabs.json !agents/addon/manifest.json @@ -28,3 +29,5 @@ xdump_config .vscode/ speechsdk/ SpeechSDK-Linux.tar.gz +pyvenv.cfg +xdump_config \ No newline at end of file diff --git a/Makefile b/Makefile index e8fb73ec1..edec54213 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ build: build-agents build-server build-agents: @echo ">> build agents" - cd agents && ./scripts/install_deps_and_build.sh linux x64 && mv bin/main bin/worker + cd agents && ./scripts/install_deps_and_build.sh linux x64 @echo ">> done" build-server: diff --git a/agents/.gitignore b/agents/.gitignore index 5ab2252d1..460c5c138 100644 --- a/agents/.gitignore +++ b/agents/.gitignore @@ -23,3 +23,4 @@ lib/ session_control.conf.agora xdump_config .vscode +*.pyc diff --git a/agents/addon/extension/azure_tts/manifest.json b/agents/addon/extension/azure_tts/manifest.json index c1f70cf5c..c1b72bb80 100644 --- a/agents/addon/extension/azure_tts/manifest.json +++ b/agents/addon/extension/azure_tts/manifest.json @@ -1,13 +1,13 @@ { "type": "extension", "name": "azure_tts", - "version": "0.1.0", + "version": "0.2.0", "language": "cpp", "dependencies": [ { "type": "system", "name": "rte_runtime", - "version": "0.1.0" + "version": "0.2.0" }, { "type": "system", diff --git a/agents/addon/extension/azure_tts/src/main.cc b/agents/addon/extension/azure_tts/src/main.cc index 61162f8f7..e5afca08a 100644 --- a/agents/addon/extension/azure_tts/src/main.cc +++ b/agents/addon/extension/azure_tts/src/main.cc @@ -14,6 +14,7 @@ #include "log.h" #include "macro/check.h" #include "rte_runtime/binding/cpp/internal/msg/cmd/cmd.h" +#include "rte_runtime/binding/cpp/internal/msg/cmd_result.h" #include "rte_runtime/binding/cpp/internal/msg/pcm_frame.h" #include "rte_runtime/binding/cpp/internal/rte_proxy.h" #include "rte_runtime/binding/cpp/rte.h" @@ -69,13 +70,13 @@ class azure_tts_extension_t : public rte::extension_t { pcm_frame->set_data_fmt(RTE_PCM_FRAME_DATA_FMT_INTERLEAVE); pcm_frame->set_samples_per_channel(samples_per_10ms); pcm_frame->alloc_buf(pcm_frame_size); - rte::buf_t borrowed_buf = pcm_frame->borrow_buf(0); + rte::buf_t borrowed_buf = pcm_frame->lock_buf(0); auto *buf = borrowed_buf.data(); if (buf != nullptr) { memset(buf, 0, pcm_frame_size); // fill empty if size is not enough for 10ms memcpy(buf, data, size); } - pcm_frame->give_back_buf(borrowed_buf); + pcm_frame->unlock_buf(borrowed_buf); auto pcm_frame_shared = std::make_shared>(std::move(pcm_frame)); rte_proxy->notify( @@ -106,7 +107,7 @@ class azure_tts_extension_t : public rte::extension_t { // {"name": "flush"} void on_cmd(rte::rte_t &rte, std::unique_ptr cmd) override { - std::string command = cmd->get_msg_name(); + std::string command = cmd->get_name(); AZURE_TTS_LOGI("%s", command.c_str()); if (command == kCmdNameFlush) { @@ -117,12 +118,12 @@ class azure_tts_extension_t : public rte::extension_t { auto ret = rte.send_cmd(rte::cmd_t::create(kCmdNameFlush.c_str())); if (ret != RTE_STATUS_CODE_OK) { AZURE_TTS_LOGE("Failed to send cmd %s, ret:%d", kCmdNameFlush.c_str(), int(ret)); - rte.return_string(RTE_STATUS_CODE_ERROR, "Failed to send cmd", std::move(cmd)); + rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_ERROR), std::move(cmd)); } else { - rte.return_string(RTE_STATUS_CODE_OK, "ok", std::move(cmd)); + rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_OK), std::move(cmd)); } } else { - rte.return_string(RTE_STATUS_CODE_OK, "unregistered cmd", std::move(cmd)); + rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_OK), std::move(cmd)); } } diff --git a/agents/addon/extension/openai_chatgpt_python/__init__.py b/agents/addon/extension/openai_chatgpt_python/__init__.py new file mode 100644 index 000000000..98152ef44 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/__init__.py @@ -0,0 +1,3 @@ +from . import main + +print("openai_chatgpt_python extension loaded") diff --git a/agents/addon/extension/openai_chatgpt_python/main.py b/agents/addon/extension/openai_chatgpt_python/main.py new file mode 100644 index 000000000..118464040 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/main.py @@ -0,0 +1,67 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + StatusCode, + CmdResult, + MetadataInfo, + RTE_PIXEL_FMT, +) +from rte_runtime_python.image_frame import ImageFrame +from PIL import Image, ImageFilter + +class OpenAIChatGPTExtension(Extension): + def on_init( + self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + print("OpenAIChatGPTExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + print("OpenAIChatGPTExtension on_start") + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + print("OpenAIChatGPTExtension on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + print("OpenAIChatGPTExtension on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + print("OpenAIChatGPTExtension on_cmd") + cmd_json = cmd.to_json() + print("OpenAIChatGPTExtension on_cmd json: " + cmd_json) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: + print("OpenAIChatGPTExtension on_cmd") + +@register_addon_as_extension("openai_chatgpt_python") +class OpenAIChatGPTExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + print("OpenAIChatGPTExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str) -> Extension: + print("OpenAIChatGPTExtensionAddon on_create_instance") + return OpenAIChatGPTExtension(addon_name) + + def on_deinit(self, rte: Rte) -> None: + print("OpenAIChatGPTExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json new file mode 100644 index 000000000..1deea357e --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -0,0 +1,55 @@ +{ + "type": "extension", + "name": "openai_chatgpt_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "property": { + "width": { + "type": "int32" + }, + "height": { + "type": "int32" + } + }, + "image_frame_in": [ + { + "name": "image_frame", + "property": { + "timestamp": { + "type": "int64" + }, + "width": { + "type": "int32" + }, + "height": { + "type": "int32" + } + } + } + ], + "image_frame_out": [ + { + "name": "image_frame", + "property": { + "timestamp": { + "type": "int64" + }, + "width": { + "type": "int32" + }, + "height": { + "type": "int32" + } + } + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/property.json b/agents/addon/extension/openai_chatgpt_python/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/requirements.txt b/agents/addon/extension/openai_chatgpt_python/requirements.txt new file mode 100644 index 000000000..4efc8c760 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/requirements.txt @@ -0,0 +1 @@ +pillow==10.4.0 diff --git a/agents/main.py b/agents/main.py new file mode 100644 index 000000000..9b8c2908a --- /dev/null +++ b/agents/main.py @@ -0,0 +1,52 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from glob import glob +import importlib.util +import os +from os.path import dirname +from rte_runtime_python import ( + App, +) + + +def log(msg): + print("[PYTHON] {}".format(msg)) + + +class TestApp(App): + def on_init(self, rte, manifest, property): + log("app on_init") + rte.on_init_done(manifest, property) + + def on_deinit(self, rte) -> None: + log("app on_deinit") + rte.on_deinit_done() + + +if __name__ == "__main__": + + basedir = dirname(__file__) + log("app init") + + for module in glob(os.path.join(basedir, "addon/extension/*")): + if os.path.isdir(module): + module_name = os.path.basename(module) + spec = importlib.util.find_spec( + "addon.extension.{}".format(module_name) + ) + if spec is not None: + mod = importlib.import_module( + "addon.extension.{}".format(module_name) + ) + print("imported module: {}".format(module_name)) + + app = TestApp() + log("app created") + + app.run(False) + log("app run done") diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 0130d4e18..d9e71a962 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -1,38 +1,23 @@ { "type": "app", "name": "astra_agents", - "version": "0.1.0", - "language": "go", + "version": "0.2.0", + "language": "python", "dependencies": [ { "type": "extension_group", "name": "default_extension_group", - "version": "0.1.0" + "version": "0.2.0" }, { "type": "extension", "name": "agora_rtc", - "version": "0.1.1" + "version": "0.2.0-alpha" }, { - "type": "extension", - "name": "chat_transcriber", - "version": "0.1.0" - }, - { - "type": "extension", - "name": "azure_tts", - "version": "0.1.0" - }, - { - "type": "extension", - "name": "interrupt_detector", - "version": "0.1.0" - }, - { - "type": "extension", - "name": "openai_chatgpt", - "version": "0.1.0" + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" } ], "predefined_graphs": [ @@ -62,16 +47,10 @@ "agora_asr_session_control_file_path": "session_control.conf" } }, - { - "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector", - "name": "interrupt_detector" - }, { "type": "extension", "extension_group": "chatgpt", - "addon": "openai_chatgpt", + "addon": "openai_chatgpt_python", "name": "openai_chatgpt", "property": { "base_url": "", @@ -96,12 +75,6 @@ "azure_synthesis_voice_name": "en-US-JaneNeural" } }, - { - "type": "extension", - "extension_group": "transcriber", - "addon": "chat_transcriber", - "name": "chat_transcriber" - }, { "type": "extension_group", "addon": "default_extension_group", @@ -116,11 +89,6 @@ "type": "extension_group", "addon": "default_extension_group", "name": "tts" - }, - { - "type": "extension_group", - "addon": "default_extension_group", - "name": "transcriber" } ], "connections": [ @@ -131,17 +99,9 @@ { "name": "text_data", "dest": [ - { - "extension_group": "default", - "extension": "interrupt_detector" - }, { "extension_group": "chatgpt", "extension": "openai_chatgpt" - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber" } ] } @@ -157,30 +117,6 @@ { "extension_group": "tts", "extension": "azure_tts" - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber", - "cmd_conversions": [ - { - "cmd": { - "type": "per_property", - "keep_original": true, - "rules": [ - { - "path": "is_final", - "type": "fixed_value", - "value": "bool(true)" - }, - { - "path": "stream_id", - "type": "fixed_value", - "value": "uint32(999)" - } - ] - } - } - ] } ] } @@ -222,36 +158,6 @@ ] } ] - }, - { - "extension_group": "transcriber", - "extension": "chat_transcriber", - "data": [ - { - "name": "data", - "dest": [ - { - "extension_group": "default", - "extension": "agora_rtc" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ] } ] } diff --git a/agents/scripts/install_deps_and_build.sh b/agents/scripts/install_deps_and_build.sh index 1b0e5acd4..9c0de1221 100755 --- a/agents/scripts/install_deps_and_build.sh +++ b/agents/scripts/install_deps_and_build.sh @@ -85,7 +85,7 @@ main() { # build addons and app build_cxx_addon $APP_HOME - build_go_app $APP_HOME + # build_go_app $APP_HOME } main "$@" diff --git a/agents/scripts/package.sh b/agents/scripts/package.sh index ba87f7f30..05f10a215 100755 --- a/agents/scripts/package.sh +++ b/agents/scripts/package.sh @@ -17,6 +17,13 @@ copy_extension() { if [[ -f addon/extension/$extension/manifest.json ]]; then cp addon/extension/$extension/manifest.json .release/addon/extension/$extension/ + + # package .py for python extensions + EXTENSION_LANGUAGE=$(jq -r '.language' addon/extension/$extension/manifest.json) + if [[ $EXTENSION_LANGUAGE == "python" ]]; then + # TODO: package 'publish' contents only + cp addon/extension/$extension/*.py .release/addon/extension/$extension/ + fi fi if [[ -f addon/extension/$extension/property.json ]]; then @@ -30,14 +37,27 @@ cp manifest.json .release cp manifest.elevenlabs.json .release cp property.json .release -mkdir .release/addon +# python main and deps +if [[ -f main.py ]]; then + cp main.py .release +fi +if [[ -d interface/rte_runtime_python ]]; then + mkdir -p .release/interface + cp -r interface/rte_runtime_python .release/interface +fi + +# extension group +mkdir -p .release/addon cp -r addon/extension_group .release/addon/ -cp -r session_control.conf .release/ +# extensions mkdir -p .release/addon/extension - for extension in addon/extension/* do extension_name=$(basename $extension) copy_extension $extension_name done + +if [[ -f session_control.conf ]]; then + cp -r session_control.conf .release/ +fi diff --git a/agents/start b/agents/start new file mode 100755 index 000000000..dfc254ecc --- /dev/null +++ b/agents/start @@ -0,0 +1,37 @@ +#!/bin/bash + +set -e + +cd "$(dirname "${BASH_SOURCE[0]}")/.." + +python3 -m venv . +cd bin +source activate +cd .. + +if [[ -f "requirements.txt" ]]; then + pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ -r requirements.txt +fi + +# traverse the addon/extension directory to find the requirements.txt +if [[ -d "addon/extension" ]]; then + for extension in addon/extension/*; do + if [[ -f "$extension/requirements.txt" ]]; then + pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ -r $extension/requirements.txt + fi + done +fi + +# if [[ -f "lib/libclang_rt.asan_osx_dynamic.dylib" ]]; then +# export DYLD_INSERT_LIBRARIES=lib/libclang_rt.asan_osx_dynamic.dylib +# fi + +# if [[ -f "lib/libasan.so" ]]; then +# export LD_PRELOAD=lib/libasan.so +# fi + +export PYTHONPATH=lib:interface +export RTE_HOME=. +# export PYTHONMALLOC=malloc + +python3 main.py From 02ae2e764e62aa639c76eebe7201df220cff685b Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 09:36:14 +0000 Subject: [PATCH 03/72] feat: workaround for drop pcm frame --- agents/manifest.json.example | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/agents/manifest.json.example b/agents/manifest.json.example index d9e71a962..964f0dd04 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -105,6 +105,17 @@ } ] } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } ] }, { From 0e870890d602e014daae81a681a162cd4f9cea3e Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 09:36:28 +0000 Subject: [PATCH 04/72] fix: rte_proxy release --- agents/addon/extension/azure_tts/src/main.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/agents/addon/extension/azure_tts/src/main.cc b/agents/addon/extension/azure_tts/src/main.cc index e5afca08a..5e57bbe59 100644 --- a/agents/addon/extension/azure_tts/src/main.cc +++ b/agents/addon/extension/azure_tts/src/main.cc @@ -152,6 +152,7 @@ class azure_tts_extension_t : public rte::extension_t { azure_tts_->Stop(); azure_tts_ = nullptr; } + rte_proxy_.reset(); // Extension stop. rte.on_stop_done(); From 1ffc534762ad2a8177567f0e430378cad84e7ac8 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 09:41:04 +0000 Subject: [PATCH 05/72] fix: stop on wait --- agents/addon/extension/azure_tts/src/tts.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/addon/extension/azure_tts/src/tts.cc b/agents/addon/extension/azure_tts/src/tts.cc index 4bcd1ce7e..ea5a2919d 100644 --- a/agents/addon/extension/azure_tts/src/tts.cc +++ b/agents/addon/extension/azure_tts/src/tts.cc @@ -36,7 +36,7 @@ bool AzureTTS::Start() { { std::unique_lock lk(tasks_mutex_); - tasks_cv_.wait(lk, [this]() { return !tasks_.empty(); }); + tasks_cv_.wait(lk); if (tasks_.empty()) { continue; } From 0743c7cc007a1b25e8d45f8a19fe4cf750319b50 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 10:25:31 +0000 Subject: [PATCH 06/72] feat: repath --- agents/{ => bin}/start | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename agents/{ => bin}/start (100%) diff --git a/agents/start b/agents/bin/start similarity index 100% rename from agents/start rename to agents/bin/start From 1a9299f86ec110ce858d76b3689964d7857ab202 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 9 Jul 2024 10:26:28 +0000 Subject: [PATCH 07/72] fix: remove proxy --- agents/bin/start | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/bin/start b/agents/bin/start index dfc254ecc..dbf68737f 100755 --- a/agents/bin/start +++ b/agents/bin/start @@ -10,14 +10,14 @@ source activate cd .. if [[ -f "requirements.txt" ]]; then - pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ -r requirements.txt + pip install -r requirements.txt fi # traverse the addon/extension directory to find the requirements.txt if [[ -d "addon/extension" ]]; then for extension in addon/extension/*; do if [[ -f "$extension/requirements.txt" ]]; then - pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ -r $extension/requirements.txt + pip install -r $extension/requirements.txt fi done fi From 6cec4561a1e1a15e4b7a30f81aabe5fe17e9ab62 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Fri, 12 Jul 2024 15:03:37 +0800 Subject: [PATCH 08/72] Feature/python expermential (#92) * feat: add interrupt detector python extension * feat(): add chat transcriber python extension * feat(): add elevenlabs tts python extension * chore(): modify comment * chore(): modify log --- .../chat_transcriber_python/__init__.py | 4 + .../chat_transcriber_extension.py | 161 ++++++++++ .../extension/chat_transcriber_python/log.py | 13 + .../chat_transcriber_python/manifest.json | 40 +++ .../pb/chat_text.proto | 37 +++ .../pb/chat_text_pb2.py | 41 +++ .../chat_transcriber_python/property.json | 1 + .../chat_transcriber_python/requirements.txt | 1 + .../elevenlabs_tts_python/__init__.py | 5 + .../elevenlabs_tts_python/elevenlabs_tts.py | 67 ++++ .../elevenlabs_tts_extension.py | 294 ++++++++++++++++++ .../extension/elevenlabs_tts_python/log.py | 13 + .../elevenlabs_tts_python/manifest.json | 69 ++++ .../extension/elevenlabs_tts_python/pcm.py | 74 +++++ .../elevenlabs_tts_python/property.json | 1 + .../elevenlabs_tts_python/requirements.txt | 1 + .../interrupt_detector_python/__init__.py | 4 + .../interrupt_detector_extension.py | 104 +++++++ .../interrupt_detector_python/log.py | 13 + .../interrupt_detector_python/manifest.json | 33 ++ .../interrupt_detector_python/property.json | 1 + .../extension/openai_chatgpt_python/main.py | 12 +- 22 files changed, 983 insertions(+), 6 deletions(-) create mode 100644 agents/addon/extension/chat_transcriber_python/__init__.py create mode 100644 agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py create mode 100644 agents/addon/extension/chat_transcriber_python/log.py create mode 100644 agents/addon/extension/chat_transcriber_python/manifest.json create mode 100644 agents/addon/extension/chat_transcriber_python/pb/chat_text.proto create mode 100644 agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py create mode 100644 agents/addon/extension/chat_transcriber_python/property.json create mode 100644 agents/addon/extension/chat_transcriber_python/requirements.txt create mode 100644 agents/addon/extension/elevenlabs_tts_python/__init__.py create mode 100644 agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py create mode 100644 agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py create mode 100644 agents/addon/extension/elevenlabs_tts_python/log.py create mode 100644 agents/addon/extension/elevenlabs_tts_python/manifest.json create mode 100644 agents/addon/extension/elevenlabs_tts_python/pcm.py create mode 100644 agents/addon/extension/elevenlabs_tts_python/property.json create mode 100644 agents/addon/extension/elevenlabs_tts_python/requirements.txt create mode 100644 agents/addon/extension/interrupt_detector_python/__init__.py create mode 100644 agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py create mode 100644 agents/addon/extension/interrupt_detector_python/log.py create mode 100644 agents/addon/extension/interrupt_detector_python/manifest.json create mode 100644 agents/addon/extension/interrupt_detector_python/property.json diff --git a/agents/addon/extension/chat_transcriber_python/__init__.py b/agents/addon/extension/chat_transcriber_python/__init__.py new file mode 100644 index 000000000..f8689834c --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/__init__.py @@ -0,0 +1,4 @@ +from . import chat_transcriber_extension +from .log import logger + +logger.info("chat_transcriber_python extension loaded") diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py new file mode 100644 index 000000000..59f92c2c2 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -0,0 +1,161 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +import time +from .pb import chat_text_pb2 as pb +from .log import logger + +CMD_NAME_FLUSH = "flush" + +TEXT_DATA_TEXT_FIELD = "text" +TEXT_DATA_FINAL_FIELD = "is_final" +TEXT_DATA_STREAM_ID_FIELD = "stream_id" +TEXT_DATA_END_OF_SEGMENT_FIELD = "end_of_segment" + +# record the cached text data for each stream id +cached_text_map = {} + + +class ChatTranscriberExtension(Extension): + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + logger.info("on_start") + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + logger.info("on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + logger.info("on_cmd") + cmd_json = cmd.to_json() + logger.info("on_cmd json: " % cmd_json) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: Rte, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} + """ + try: + text = data.get_property_string(TEXT_DATA_TEXT_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_string {TEXT_DATA_TEXT_FIELD} error: {e}" + ) + return + + try: + final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_bool {TEXT_DATA_FINAL_FIELD} error: {e}" + ) + return + + try: + stream_id = data.get_property_int(TEXT_DATA_STREAM_ID_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_int {TEXT_DATA_STREAM_ID_FIELD} error: {e}" + ) + return + + try: + end_of_segment = data.get_property_bool(TEXT_DATA_END_OF_SEGMENT_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_bool {TEXT_DATA_END_OF_SEGMENT_FIELD} error: {e}" + ) + return + + logger.debug( + f"on_data {TEXT_DATA_TEXT_FIELD}: {text} {TEXT_DATA_FINAL_FIELD}: {final} {TEXT_DATA_STREAM_ID_FIELD}: {stream_id} {TEXT_DATA_END_OF_SEGMENT_FIELD}: {end_of_segment}" + ) + + # We cache all final text data and append the non-final text data to the cached data + # until the end of the segment. + if end_of_segment: + if stream_id in cached_text_map: + text = cached_text_map[stream_id] + text + del cached_text_map[stream_id] + else: + if final: + if stream_id in cached_text_map: + text = cached_text_map[stream_id] + text + + cached_text_map[stream_id] = text + + pb_text = pb.Text( + uid=stream_id, + data_type="transcribe", + texttime=int(time.time() * 1000), # Convert to milliseconds + words=[ + pb.Word( + text=text, + is_final=end_of_segment, + ), + ], + ) + + try: + pb_serialized_text = pb_text.SerializeToString() + except Exception as e: + logger.warning(f"on_data SerializeToString error: {e}") + return + + try: + # convert the origin text data to the protobuf data and send it to the graph. + rte_data = data.create("data") + rte_data.set_property_string("data", pb_serialized_text) + except Exception as e: + logger.warning(f"on_data new_data error: {e}") + return + + rte.send_data(rte_data) + + +@register_addon_as_extension("chat_transcriber_python") +class ChatTranscriberExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(ChatTranscriberExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/chat_transcriber_python/log.py b/agents/addon/extension/chat_transcriber_python/log.py new file mode 100644 index 000000000..e30358c11 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("chat_transcriber_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/chat_transcriber_python/manifest.json b/agents/addon/extension/chat_transcriber_python/manifest.json new file mode 100644 index 000000000..c171b4466 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/manifest.json @@ -0,0 +1,40 @@ +{ + "type": "extension", + "name": "chat_transcriber_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "property": {}, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + }, + "stream_id": { + "type": "uint32" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ], + "data_out": [ + { + "name": "data" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/pb/chat_text.proto b/agents/addon/extension/chat_transcriber_python/pb/chat_text.proto new file mode 100644 index 000000000..c770eb494 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/pb/chat_text.proto @@ -0,0 +1,37 @@ +syntax = "proto3"; + +package agora.chat_transcriber_python; +option go_package = ".;pb"; + +message Text { + int32 vendor = 1; + int32 version = 2; + int32 seqnum = 3; + int32 uid = 4; + int32 flag = 5; + int64 time = 6; // final time =first nofinal time + int32 lang = 7; + int32 starttime = 8; + int32 offtime = 9; + repeated Word words = 10; + bool end_of_segment = 11; + int32 duration_ms = 12; + string data_type = 13; // transcribe ,translate + repeated Translation trans = 14; + string culture = 15; + int64 texttime = 16; // pkg timestamp +} + +message Word { + string text = 1; + int32 start_ms = 2; + int32 duration_ms = 3; + bool is_final = 4; + double confidence = 5; +} + +message Translation { + bool is_final = 1; + string lang = 2; + repeated string texts = 3; +} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py b/agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py new file mode 100644 index 000000000..17142fc89 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/pb/chat_text_pb2.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE +# source: chat_text.proto +# Protobuf Python Version: 5.27.2 +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 5, + 27, + 2, + '', + 'chat_text.proto' +) +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0f\x63hat_text.proto\x12\x1d\x61gora.chat_transcriber_python\"\xe4\x02\n\x04Text\x12\x0e\n\x06vendor\x18\x01 \x01(\x05\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12\x0e\n\x06seqnum\x18\x03 \x01(\x05\x12\x0b\n\x03uid\x18\x04 \x01(\x05\x12\x0c\n\x04\x66lag\x18\x05 \x01(\x05\x12\x0c\n\x04time\x18\x06 \x01(\x03\x12\x0c\n\x04lang\x18\x07 \x01(\x05\x12\x11\n\tstarttime\x18\x08 \x01(\x05\x12\x0f\n\x07offtime\x18\t \x01(\x05\x12\x32\n\x05words\x18\n \x03(\x0b\x32#.agora.chat_transcriber_python.Word\x12\x16\n\x0e\x65nd_of_segment\x18\x0b \x01(\x08\x12\x13\n\x0b\x64uration_ms\x18\x0c \x01(\x05\x12\x11\n\tdata_type\x18\r \x01(\t\x12\x39\n\x05trans\x18\x0e \x03(\x0b\x32*.agora.chat_transcriber_python.Translation\x12\x0f\n\x07\x63ulture\x18\x0f \x01(\t\x12\x10\n\x08texttime\x18\x10 \x01(\x03\"a\n\x04Word\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x10\n\x08start_ms\x18\x02 \x01(\x05\x12\x13\n\x0b\x64uration_ms\x18\x03 \x01(\x05\x12\x10\n\x08is_final\x18\x04 \x01(\x08\x12\x12\n\nconfidence\x18\x05 \x01(\x01\"<\n\x0bTranslation\x12\x10\n\x08is_final\x18\x01 \x01(\x08\x12\x0c\n\x04lang\x18\x02 \x01(\t\x12\r\n\x05texts\x18\x03 \x03(\tB\x06Z\x04.;pbb\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chat_text_pb2', _globals) +if not _descriptor._USE_C_DESCRIPTORS: + _globals['DESCRIPTOR']._loaded_options = None + _globals['DESCRIPTOR']._serialized_options = b'Z\004.;pb' + _globals['_TEXT']._serialized_start=51 + _globals['_TEXT']._serialized_end=407 + _globals['_WORD']._serialized_start=409 + _globals['_WORD']._serialized_end=506 + _globals['_TRANSLATION']._serialized_start=508 + _globals['_TRANSLATION']._serialized_end=568 +# @@protoc_insertion_point(module_scope) diff --git a/agents/addon/extension/chat_transcriber_python/property.json b/agents/addon/extension/chat_transcriber_python/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/requirements.txt b/agents/addon/extension/chat_transcriber_python/requirements.txt new file mode 100644 index 000000000..a4a4a4d8d --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/requirements.txt @@ -0,0 +1 @@ +protobuf==5.27.2 diff --git a/agents/addon/extension/elevenlabs_tts_python/__init__.py b/agents/addon/extension/elevenlabs_tts_python/__init__.py new file mode 100644 index 000000000..f80e230ad --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/__init__.py @@ -0,0 +1,5 @@ +from . import elevenlabs_tts_extension +from .log import logger + + +logger.info("elevenlabs_tts_python extension loaded") diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py new file mode 100644 index 000000000..8f42e6d5f --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py @@ -0,0 +1,67 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from typing import Iterator +from elevenlabs import Voice, VoiceSettings +from elevenlabs.client import ElevenLabs + + +class ElevenlabsTTSConfig: + def __init__( + self, + api_key="", + model_id="eleven_multilingual_v2", + optimize_streaming_latency=0, + request_timeout_seconds=30, + similarity_boost=0.75, + speaker_boost=False, + stability=0.5, + style=0.0, + voice_id="pNInz6obpgDQGcFmaJgB", + ) -> None: + self.api_key = api_key + self.model_id = model_id + self.optimize_streaming_latency = optimize_streaming_latency + self.request_timeout_seconds = request_timeout_seconds + self.similarity_boost = similarity_boost + self.speaker_boost = speaker_boost + self.stability = stability + self.style = style + self.voice_id = voice_id + + +def default_elevenlabs_tts_config() -> ElevenlabsTTSConfig: + return ElevenlabsTTSConfig() + + +class ElevenlabsTTS: + def __init__(self, config) -> None: + self.config = config + self.client = ElevenLabs( + api_key=config.api_key, timeout=config.request_timeout_seconds + ) + + def text_to_speech_stream(self, text) -> Iterator[bytes]: + audio_stream = self.client.generate( + text=text, + model=self.config.model_id, + optimize_streaming_latency=self.config.optimize_streaming_latency, + output_format="pcm_16000", + stream=True, + voice=Voice( + voice_id=self.config.voice_id, + settings=VoiceSettings( + stability=self.config.stability, + similarity_boost=self.config.similarity_boost, + style=self.config.style, + speaker_boost=self.config.speaker_boost, + ), + ), + ) + + return audio_stream diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py new file mode 100644 index 000000000..d2e94b294 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py @@ -0,0 +1,294 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +import queue +import threading +import time + +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + CmdResult, + StatusCode, + Data, + MetadataInfo, +) +from .elevenlabs_tts import default_elevenlabs_tts_config, ElevenlabsTTS +from .pcm import PcmConfig, Pcm +from .log import logger + +CMD_IN_FLUSH = "flush" +CMD_OUT_FLUSH = "flush" + +DATA_IN_TEXT_DATA_PROPERTY_TEXT = "text" + +PROPERTY_API_KEY = "api_key" # Required +PROPERTY_MODEL_ID = "model_id" # Optional +PROPERTY_OPTIMIZE_STREAMING_LATENCY = "optimize_streaming_latency" # Optional +PROPERTY_REQUEST_TIMEOUT_SECONDS = "request_timeout_seconds" # Optional +PROPERTY_SIMILARITY_BOOST = "similarity_boost" # Optional +PROPERTY_SPEAKER_BOOST = "speaker_boost" # Optional +PROPERTY_STABILITY = "stability" # Optional +PROPERTY_STYLE = "style" # Optional + + +class Message: + def __init__(self, text: str, received_ts: int) -> None: + self.text = text + self.received_ts = received_ts + + +class ElevenlabsTTSExtension(Extension): + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + logger.info("on_init") + + self.elevenlabs_tts = None + self.outdate_ts = 0 + self.pcm = None + self.pcm_frame_size = 0 + self.text_queue = queue.Queue(maxsize=1024) + + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + logger.info("on_start") + + # prepare configuration + elevenlabs_tts_config = default_elevenlabs_tts_config() + + try: + elevenlabs_tts_config.api_key = rte.get_property_string(PROPERTY_API_KEY) + except Exception as e: + logger.warning( + f"on_start get_property_string {PROPERTY_API_KEY} error: {e}" + ) + return + + try: + model_id = rte.get_property_string(PROPERTY_MODEL_ID) + if len(model_id) > 0: + elevenlabs_tts_config.model_id = model_id + except Exception as e: + logger.warning( + f"on_start get_property_string {PROPERTY_MODEL_ID} error: {e}" + ) + + try: + optimize_streaming_latency = rte.get_property_int( + PROPERTY_OPTIMIZE_STREAMING_LATENCY + ) + if optimize_streaming_latency > 0: + elevenlabs_tts_config.optimize_streaming_latency = ( + optimize_streaming_latency + ) + except Exception as e: + logger.warning( + f"on_start get_property_int {PROPERTY_OPTIMIZE_STREAMING_LATENCY} error: {e}" + ) + + try: + request_timeout_seconds = rte.get_property_int( + PROPERTY_REQUEST_TIMEOUT_SECONDS + ) + if request_timeout_seconds > 0: + elevenlabs_tts_config.request_timeout_seconds = request_timeout_seconds + except Exception as e: + logger.warning( + f"on_start get_property_int {PROPERTY_REQUEST_TIMEOUT_SECONDS} error: {e}" + ) + + try: + elevenlabs_tts_config.similarity_boost = rte.get_property_float( + PROPERTY_SIMILARITY_BOOST + ) + except Exception as e: + logger.warning( + f"on_start get_property_float {PROPERTY_SIMILARITY_BOOST} error: {e}" + ) + + try: + elevenlabs_tts_config.speaker_boost = rte.get_property_bool( + PROPERTY_SPEAKER_BOOST + ) + except Exception as e: + logger.warning( + f"on_start get_property_bool {PROPERTY_SPEAKER_BOOST} error: {e}" + ) + + try: + elevenlabs_tts_config.stability = rte.get_property_float(PROPERTY_STABILITY) + except Exception as e: + logger.warning( + f"on_start get_property_float {PROPERTY_STABILITY} error: {e}" + ) + + try: + elevenlabs_tts_config.style = rte.get_property_float(PROPERTY_STYLE) + except Exception as e: + logger.warning(f"on_start get_property_float {PROPERTY_STYLE} error: {e}") + + # create elevenlabsTTS instance + self.elevenlabs_tts = ElevenlabsTTS(elevenlabs_tts_config) + + logger.info( + f"ElevenlabsTTS succeed with model_id: {self.elevenlabs_tts.config.model_id}, VoiceId: {self.elevenlabs_tts.config.voice_id}" + ) + + # create pcm instance + self.pcm = Pcm(PcmConfig()) + self.pcm_frame_size = self.pcm.get_pcm_frame_size() + + threading.Thread(target=self.process_text_queue, args=(rte,)).start() + + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + logger.info("on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + """ + on_cmd receives cmd from rte graph. + current supported cmd: + - name: flush + example: + {"name": "flush"} + """ + logger.info("on_cmd") + cmd_name = cmd.get_name() + + logger.info(f"on_cmd [{cmd_name}]") + + if cmd_name is CMD_IN_FLUSH: + self.outdate_ts = int(time.time() * 1000000) + + # send out + out_cmd = cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(out_cmd) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: Rte, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + logger.info("on_data") + + try: + text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) + except Exception as e: + logger.warning( + f"on_data get_property_string {DATA_IN_TEXT_DATA_PROPERTY_TEXT} error: {e}" + ) + return + + if len(text) == 0: + logger.debug("on_data text is empty, ignored") + return + + logger.info(f"OnData input text: [{text}]") + + self.text_queue.put(Message(text, int(time.time() * 1000000))) + + def process_text_queue(self, rte: Rte): + logger.info("process_text_queue") + + while True: + msg = self.text_queue.get() + if msg.received_ts < self.outdate_ts: + logger.info( + f"textChan interrupt and flushing for input text: [{msg.text}], received_ts: {msg.received_ts}, outdate_ts: {self.outdate_ts}" + ) + continue + + start_time = time.time() + buf = self.pcm.new_buf() + first_frame_latency = 0 + n = 0 + pcm_frame_read = 0 + read_bytes = 0 + sent_frames = 0 + + audio_stream = self.elevenlabs_tts.text_to_speech_stream(msg) + + for chunk in self.pcm.read_pcm_stream(audio_stream, self.pcm_frame_size): + if msg.received_ts < self.outdate_ts: + logger.info( + f"textChan interrupt and flushing for input text: [{msg.text}], received_ts: {msg.received_ts}, outdate_ts: {self.outdate_ts}" + ) + break + + if not chunk: + logger.info("read pcm stream EOF") + break + + n = len(chunk) + read_bytes += n + pcm_frame_read += n + + if pcm_frame_read != self.pcm.get_pcm_frame_size(): + logger.debug( + f"the number of bytes read is [{pcm_frame_read}] inconsistent with pcm frame size", + ) + continue + + self.pcm.send(rte, buf) + buf = self.pcm.new_buf() + pcm_frame_read = 0 + sent_frames += 1 + + if first_frame_latency == 0: + first_frame_latency = int((time.time() - start_time) * 1000) + logger.info( + f"first frame available for text: [{msg.text}], received_ts: {msg.received_ts}, first_frame_latency: {first_frame_latency}ms", + ) + + logger.debug(f"sending pcm data, text: [{msg.text}]") + + if pcm_frame_read > 0: + self.pcm.send(rte, buf) + sent_frames += 1 + logger.info( + f"sending pcm remain data, text: [{msg.text}], pcm_frame_read: {pcm_frame_read}" + ) + + finish_latency = int((time.time() - start_time) * 1000) + logger.info( + f"send pcm data finished, text: [{msg.text}], received_ts: {msg.received_ts}, read_bytes: {read_bytes}, sent_frames: {sent_frames}, \ + first_frame_latency: {first_frame_latency}ms, finish_latency: {finish_latency}ms" + ) + + +@register_addon_as_extension("elevenlabs_tts_python") +class ElevenlabsTTSExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/elevenlabs_tts_python/log.py b/agents/addon/extension/elevenlabs_tts_python/log.py new file mode 100644 index 000000000..54f870f34 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("elevenlabs_tts_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/elevenlabs_tts_python/manifest.json b/agents/addon/extension/elevenlabs_tts_python/manifest.json new file mode 100644 index 000000000..35b49e5ca --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/manifest.json @@ -0,0 +1,69 @@ +{ + "type": "extension", + "name": "elevenlabs_tts_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model_id": { + "type": "string" + }, + "request_timeout_seconds": { + "type": "int64" + }, + "similarity_boost": { + "type": "float64" + }, + "speaker_boost": { + "type": "bool" + }, + "stability": { + "type": "float64" + }, + "style": { + "type": "float64" + }, + "optimize_streaming_latency": { + "type": "int64" + }, + "voice_id": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "pcm_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} diff --git a/agents/addon/extension/elevenlabs_tts_python/pcm.py b/agents/addon/extension/elevenlabs_tts_python/pcm.py new file mode 100644 index 000000000..eeb33c015 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/pcm.py @@ -0,0 +1,74 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +import logging +from typing import Iterator +from rte_runtime_python import PcmFrame, Rte, RTE_PCM_FRAME_DATA_FMT + + +class Pcm: + def __init__(self, config) -> None: + self.config = config + + def get_pcm_frame(self, buf: memoryview) -> PcmFrame: + frame = PcmFrame.create(self.config.name) + frame.set_bytes_per_sample(self.config.bytes_per_sample) + frame.set_sample_rate(self.config.sample_rate) + frame.set_number_of_channels(self.config.num_channels) + frame.set_timestamp(self.config.timestamp) + frame.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_NON_INTERLEAVE) + frame.set_samples_per_channel( + self.config.samples_per_channel // self.config.channel + ) + + frame.alloc_buf(self.get_pcm_frame_size()) + frame_buf = frame.lock_buf() + # copy data + frame_buf[:] = buf + frame.unlock_buf(frame_buf) + + return frame + + def get_pcm_frame_size(self) -> int: + return ( + self.config.samples_per_channel + * self.config.channel + * self.config.bytes_per_sample + ) + + def new_buf(self) -> bytearray: + return bytearray(self.get_pcm_frame_size()) + + def read_pcm_stream( + self, stream: Iterator[bytes], chunk_size: int + ) -> Iterator[bytes]: + chunk = b"" + for data in stream: + chunk += data + while len(chunk) >= chunk_size: + yield chunk[:chunk_size] + chunk = chunk[chunk_size:] + if chunk: + yield chunk + + def send(self, rte: Rte, buf: memoryview) -> None: + try: + frame = self.get_pcm_frame(buf) + rte.send_pcm_frame(frame) + except Exception as e: + logging.error(f"send frame failed, {e}") + + +class PcmConfig: + def __init__(self) -> None: + self.bytes_per_sample = 2 + self.channel = 1 + self.name = "pcm_frame" + self.sample_rate = 16000 + self.samples_per_channel = 16000 // 100 + self.timestamp = 0 diff --git a/agents/addon/extension/elevenlabs_tts_python/property.json b/agents/addon/extension/elevenlabs_tts_python/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts_python/requirements.txt b/agents/addon/extension/elevenlabs_tts_python/requirements.txt new file mode 100644 index 000000000..3dff4c023 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/requirements.txt @@ -0,0 +1 @@ +elevenlabs==1.4.1 diff --git a/agents/addon/extension/interrupt_detector_python/__init__.py b/agents/addon/extension/interrupt_detector_python/__init__.py new file mode 100644 index 000000000..a3ec245c3 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/__init__.py @@ -0,0 +1,4 @@ +from . import interrupt_detector_extension +from .log import logger + +logger.info("interrupt_detector_python extension loaded") diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py new file mode 100644 index 000000000..ac55f0fda --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -0,0 +1,104 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, +) +from .log import logger + + +CMD_NAME_FLUSH = "flush" + +TEXT_DATA_TEXT_FIELD = "text" +TEXT_DATA_FINAL_FIELD = "is_final" + + +class InterruptDetectorExtension(Extension): + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + logger.info("on_start") + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + logger.info("on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + logger.info("on_cmd") + cmd_json = cmd.to_json() + logger.info("on_cmd json: " % cmd_json) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_data(self, rte: Rte, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello", is_final: false} + """ + try: + text = data.get_property_string(TEXT_DATA_TEXT_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_string {TEXT_DATA_TEXT_FIELD} error: {e}" + ) + return + + try: + final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) + except Exception as e: + logger.warning( + f"on_data get_property_bool {TEXT_DATA_FINAL_FIELD} error: {e}" + ) + return + + logger.debug( + f"on_data {TEXT_DATA_TEXT_FIELD}: {text} {TEXT_DATA_FINAL_FIELD}: {final}" + ) + + if final or len(text) >= 2: + flush_cmd = rte.new_cmd(CMD_NAME_FLUSH) + rte.send_cmd(flush_cmd, None) + + logger.info(f"sent cmd: {CMD_NAME_FLUSH}") + + +@register_addon_as_extension("interrupt_detector_python") +class InterruptDetectorExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(InterruptDetectorExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/interrupt_detector_python/log.py b/agents/addon/extension/interrupt_detector_python/log.py new file mode 100644 index 000000000..303d06e12 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("interrupt_detector_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/interrupt_detector_python/manifest.json b/agents/addon/extension/interrupt_detector_python/manifest.json new file mode 100644 index 000000000..9b1b2461c --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/manifest.json @@ -0,0 +1,33 @@ +{ + "type": "extension", + "name": "interrupt_detector_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector_python/property.json b/agents/addon/extension/interrupt_detector_python/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/main.py b/agents/addon/extension/openai_chatgpt_python/main.py index 118464040..ebf0271c4 100644 --- a/agents/addon/extension/openai_chatgpt_python/main.py +++ b/agents/addon/extension/openai_chatgpt_python/main.py @@ -19,10 +19,9 @@ from rte_runtime_python.image_frame import ImageFrame from PIL import Image, ImageFilter + class OpenAIChatGPTExtension(Extension): - def on_init( - self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo - ) -> None: + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: print("OpenAIChatGPTExtension on_init") rte.on_init_done(manifest, property) @@ -50,6 +49,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: print("OpenAIChatGPTExtension on_cmd") + @register_addon_as_extension("openai_chatgpt_python") class OpenAIChatGPTExtensionAddon(Addon): def on_init(self, rte: Rte, manifest, property) -> None: @@ -57,9 +57,9 @@ def on_init(self, rte: Rte, manifest, property) -> None: rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str) -> Extension: - print("OpenAIChatGPTExtensionAddon on_create_instance") - return OpenAIChatGPTExtension(addon_name) + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + print("on_create_instance") + rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) def on_deinit(self, rte: Rte) -> None: print("OpenAIChatGPTExtensionAddon on_deinit") From 4cd376c5b5da6bca2a54e3eafe7bb7a6425e82f1 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Fri, 12 Jul 2024 08:47:27 +0000 Subject: [PATCH 09/72] fix: agora_rtc --- agents/manifest.json.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 964f0dd04..69538cb0c 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -12,7 +12,7 @@ { "type": "extension", "name": "agora_rtc", - "version": "0.2.0-alpha" + "version": "0.2.1" }, { "type": "system", From c0597a00d8ef3c35d57c76f8baf774ad076bf352 Mon Sep 17 00:00:00 2001 From: sunshinexcode <24xinhui@163.com> Date: Sat, 13 Jul 2024 08:41:32 +0800 Subject: [PATCH 10/72] Feature/python expermential (#93) --- .../chat_transcriber_extension.py | 4 +++- .../elevenlabs_tts_python/elevenlabs_tts.py | 4 ++-- .../elevenlabs_tts_extension.py | 6 ++++-- .../interrupt_detector_extension.py | 4 +++- .../extension/openai_chatgpt_python/main.py | 20 +++++++++++++++++++ 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index 59f92c2c2..aa2fcf0c6 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -66,6 +66,8 @@ def on_data(self, rte: Rte, data: Data) -> None: example: {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} """ + logger.info(f"on_data") + try: text = data.get_property_string(TEXT_DATA_TEXT_FIELD) except Exception as e: @@ -135,7 +137,7 @@ def on_data(self, rte: Rte, data: Data) -> None: try: # convert the origin text data to the protobuf data and send it to the graph. - rte_data = data.create("data") + rte_data = Data.create("data") rte_data.set_property_string("data", pb_serialized_text) except Exception as e: logger.warning(f"on_data new_data error: {e}") diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py index 8f42e6d5f..6fe1b72c5 100644 --- a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts.py @@ -40,13 +40,13 @@ def default_elevenlabs_tts_config() -> ElevenlabsTTSConfig: class ElevenlabsTTS: - def __init__(self, config) -> None: + def __init__(self, config: ElevenlabsTTSConfig) -> None: self.config = config self.client = ElevenLabs( api_key=config.api_key, timeout=config.request_timeout_seconds ) - def text_to_speech_stream(self, text) -> Iterator[bytes]: + def text_to_speech_stream(self, text: str) -> Iterator[bytes]: audio_stream = self.client.generate( text=text, model=self.config.model_id, diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py index d2e94b294..1c2799624 100644 --- a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py @@ -175,7 +175,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: self.outdate_ts = int(time.time() * 1000000) # send out - out_cmd = cmd.create(CMD_OUT_FLUSH) + out_cmd = Cmd.create(CMD_OUT_FLUSH) rte.send_cmd(out_cmd) cmd_result = CmdResult.create(StatusCode.OK) @@ -213,6 +213,8 @@ def process_text_queue(self, rte: Rte): while True: msg = self.text_queue.get() + logger.debug(f"process_text_queue, text: [{msg.text}]") + if msg.received_ts < self.outdate_ts: logger.info( f"textChan interrupt and flushing for input text: [{msg.text}], received_ts: {msg.received_ts}, outdate_ts: {self.outdate_ts}" @@ -227,7 +229,7 @@ def process_text_queue(self, rte: Rte): read_bytes = 0 sent_frames = 0 - audio_stream = self.elevenlabs_tts.text_to_speech_stream(msg) + audio_stream = self.elevenlabs_tts.text_to_speech_stream(msg.text) for chunk in self.pcm.read_pcm_stream(audio_stream, self.pcm_frame_size): if msg.received_ts < self.outdate_ts: diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py index ac55f0fda..cbb751f36 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -60,6 +60,8 @@ def on_data(self, rte: Rte, data: Data) -> None: example: {name: text_data, properties: {text: "hello", is_final: false} """ + logger.info(f"on_data") + try: text = data.get_property_string(TEXT_DATA_TEXT_FIELD) except Exception as e: @@ -81,7 +83,7 @@ def on_data(self, rte: Rte, data: Data) -> None: ) if final or len(text) >= 2: - flush_cmd = rte.new_cmd(CMD_NAME_FLUSH) + flush_cmd = Cmd.create(CMD_NAME_FLUSH) rte.send_cmd(flush_cmd, None) logger.info(f"sent cmd: {CMD_NAME_FLUSH}") diff --git a/agents/addon/extension/openai_chatgpt_python/main.py b/agents/addon/extension/openai_chatgpt_python/main.py index ebf0271c4..488bebfad 100644 --- a/agents/addon/extension/openai_chatgpt_python/main.py +++ b/agents/addon/extension/openai_chatgpt_python/main.py @@ -11,6 +11,7 @@ register_addon_as_extension, Rte, Cmd, + Data, StatusCode, CmdResult, MetadataInfo, @@ -49,6 +50,25 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: print("OpenAIChatGPTExtension on_cmd") + def on_data(self, rte: Rte, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + print(f"OpenAIChatGPTExtension on_data") + + try: + rte_data = Data.create("text_data") + rte_data.set_property_string("text", "hello, world, who are you!") + except Exception as e: + print(f"on_data new_data error, ", e) + return + + rte.send_data(rte_data) + @register_addon_as_extension("openai_chatgpt_python") class OpenAIChatGPTExtensionAddon(Addon): From 48811c656f72c56b20271d4b6324d2a6d9f72853 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sat, 13 Jul 2024 16:39:05 +0800 Subject: [PATCH 11/72] feat: initial migration for openai --- .../chat_transcriber_python/requirements.txt | 1 + .../openai_chatgpt_python/__init__.py | 2 +- .../extension/openai_chatgpt_python/main.py | 87 -------- .../openai_chatgpt_python/manifest.json | 20 ++ .../openai_chatgpt_python/openai_chatgpt.py | 87 ++++++++ .../openai_chatgpt_extension.py | 207 ++++++++++++++++++ .../openai_chatgpt_python/requirements.txt | 2 + agents/bin/start | 8 +- 8 files changed, 322 insertions(+), 92 deletions(-) delete mode 100644 agents/addon/extension/openai_chatgpt_python/main.py create mode 100644 agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py create mode 100644 agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py diff --git a/agents/addon/extension/chat_transcriber_python/requirements.txt b/agents/addon/extension/chat_transcriber_python/requirements.txt index a4a4a4d8d..bcc3fd657 100644 --- a/agents/addon/extension/chat_transcriber_python/requirements.txt +++ b/agents/addon/extension/chat_transcriber_python/requirements.txt @@ -1 +1,2 @@ protobuf==5.27.2 +google==3.0.0 \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/__init__.py b/agents/addon/extension/openai_chatgpt_python/__init__.py index 98152ef44..1dc25495b 100644 --- a/agents/addon/extension/openai_chatgpt_python/__init__.py +++ b/agents/addon/extension/openai_chatgpt_python/__init__.py @@ -1,3 +1,3 @@ -from . import main +from . import openai_chatgpt_extension print("openai_chatgpt_python extension loaded") diff --git a/agents/addon/extension/openai_chatgpt_python/main.py b/agents/addon/extension/openai_chatgpt_python/main.py deleted file mode 100644 index 488bebfad..000000000 --- a/agents/addon/extension/openai_chatgpt_python/main.py +++ /dev/null @@ -1,87 +0,0 @@ -# -# -# Agora Real Time Engagement -# Created by Wei Hu in 2024-05. -# Copyright (c) 2024 Agora IO. All rights reserved. -# -# -from rte_runtime_python import ( - Addon, - Extension, - register_addon_as_extension, - Rte, - Cmd, - Data, - StatusCode, - CmdResult, - MetadataInfo, - RTE_PIXEL_FMT, -) -from rte_runtime_python.image_frame import ImageFrame -from PIL import Image, ImageFilter - - -class OpenAIChatGPTExtension(Extension): - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: - print("OpenAIChatGPTExtension on_init") - rte.on_init_done(manifest, property) - - def on_start(self, rte: Rte) -> None: - print("OpenAIChatGPTExtension on_start") - rte.on_start_done() - - def on_stop(self, rte: Rte) -> None: - print("OpenAIChatGPTExtension on_stop") - rte.on_stop_done() - - def on_deinit(self, rte: Rte) -> None: - print("OpenAIChatGPTExtension on_deinit") - rte.on_deinit_done() - - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: - print("OpenAIChatGPTExtension on_cmd") - cmd_json = cmd.to_json() - print("OpenAIChatGPTExtension on_cmd json: " + cmd_json) - - cmd_result = CmdResult.create(StatusCode.OK) - cmd_result.set_property_string("detail", "success") - rte.return_result(cmd_result, cmd) - - def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: - print("OpenAIChatGPTExtension on_cmd") - - def on_data(self, rte: Rte, data: Data) -> None: - """ - on_data receives data from rte graph. - current supported data: - - name: text_data - example: - {name: text_data, properties: {text: "hello"} - """ - print(f"OpenAIChatGPTExtension on_data") - - try: - rte_data = Data.create("text_data") - rte_data.set_property_string("text", "hello, world, who are you!") - except Exception as e: - print(f"on_data new_data error, ", e) - return - - rte.send_data(rte_data) - - -@register_addon_as_extension("openai_chatgpt_python") -class OpenAIChatGPTExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: - print("OpenAIChatGPTExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: - print("on_create_instance") - rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) - - def on_deinit(self, rte: Rte) -> None: - print("OpenAIChatGPTExtensionAddon on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json index 1deea357e..671e6a29d 100644 --- a/agents/addon/extension/openai_chatgpt_python/manifest.json +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -50,6 +50,26 @@ } } } + ], + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } ] } } \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py new file mode 100644 index 000000000..f8e6658cd --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py @@ -0,0 +1,87 @@ +import random +import requests +import openai +from typing import List, Dict, Any, Optional + + +class OpenAIChatGPTConfig: + def __init__(self, + base_url: str, + api_key: str, + model: str, + prompt: str, + frequency_penalty: float, + presence_penalty: float, + top_p: float, + temperature: float, + max_tokens: int, + seed: Optional[int] = None, + proxy_url: Optional[str] = None): + self.base_url = base_url + self.api_key = api_key + self.model = model + self.prompt = prompt + self.frequency_penalty = frequency_penalty + self.presence_penalty = presence_penalty + self.top_p = top_p + self.temperature = temperature + self.max_tokens = max_tokens + self.seed = seed if seed is not None else random.randint(0, 10000) + self.proxy_url = proxy_url + + @classmethod + def default_config(cls): + return cls( + base_url="https://api.openai.com/v1", + api_key="", + model="gpt-4", # Adjust this to match the equivalent of `openai.GPT4o` in the Python library + prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", + frequency_penalty=0.9, + presence_penalty=0.9, + top_p=1.0, + temperature=0.1, + max_tokens=512, + seed=random.randint(0, 10000), + proxy_url="" + ) + + +class OpenAIChatGPT: + def __init__(self, config: OpenAIChatGPTConfig): + self.config = config + openai.api_key = config.api_key + openai.api_base = config.base_url + if config.proxy_url: + proxies = { + "http": config.proxy_url, + "https": config.proxy_url, + } + self.session = requests.Session() + self.session.proxies.update(proxies) + else: + self.session = requests.Session() + + def get_chat_completions_stream(self, messages): + req = { + "model": self.config["Model"], + "messages": [ + { + "role": "system", + "content": self.config["Prompt"], + }, + *messages, + ], + "temperature": self.config["Temperature"], + "top_p": self.config["TopP"], + "presence_penalty": self.config["PresencePenalty"], + "frequency_penalty": self.config["FrequencyPenalty"], + "max_tokens": self.config["MaxTokens"], + "seed": self.config["Seed"], + "stream": True, + } + + try: + response = self.client.Completion.create(**req) + return response + except Exception as e: + raise Exception(f"CreateChatCompletionStream failed, err: {e}") \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py new file mode 100644 index 000000000..c9eded8a5 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -0,0 +1,207 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from .openai_chatgpt import OpenAIChatGPT, OpenAIChatGPTConfig +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, + RTE_PIXEL_FMT, +) +from rte_runtime_python.image_frame import ImageFrame +from PIL import Image, ImageFilter + + +CMD_IN_FLUSH = "flush" +CMD_OUT_FLUSH = "flush" +DATA_IN_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT = "end_of_segment" + +PROPERTY_BASE_URL = "base_url" # Optional +PROPERTY_API_KEY = "api_key" # Required +PROPERTY_MODEL = "model" # Optional +PROPERTY_PROMPT = "prompt" # Optional +PROPERTY_FREQUENCY_PENALTY = "frequency_penalty" # Optional +PROPERTY_PRESENCE_PENALTY = "presence_penalty" # Optional +PROPERTY_TEMPERATURE = "temperature" # Optional +PROPERTY_TOP_P = "top_p" # Optional +PROPERTY_MAX_TOKENS = "max_tokens" # Optional +PROPERTY_GREETING = "greeting" # Optional +PROPERTY_PROXY_URL = "proxy_url" # Optional +PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional + +memory = [] +max_memory_length = 10 + +class OpenAIChatGPTExtension(Extension): + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + print("OpenAIChatGPTExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + print("OpenAIChatGPTExtension on_start") + # Prepare configuration + openai_chatgpt_config = OpenAIChatGPTConfig.default_config() + + try: + base_url = rte.get_property_string(PROPERTY_BASE_URL) + if base_url: + openai_chatgpt_config.BaseUrl = base_url + except Exception as err: + print(f"GetProperty required {PROPERTY_BASE_URL} failed, err: {err}") + + try: + api_key = rte.get_property_string(PROPERTY_API_KEY) + openai_chatgpt_config.ApiKey = api_key + except Exception as err: + print(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") + + try: + model = rte.get_property_string(PROPERTY_MODEL) + if model: + openai_chatgpt_config.Model = model + except Exception as err: + print(f"GetProperty optional {PROPERTY_MODEL} error: {err}") + + try: + prompt = rte.get_property_string(PROPERTY_PROMPT) + if prompt: + openai_chatgpt_config.Prompt = prompt + except Exception as err: + print(f"GetProperty optional {PROPERTY_PROMPT} error: {err}") + + try: + frequency_penalty = rte.get_property_float(PROPERTY_FREQUENCY_PENALTY) + openai_chatgpt_config.FrequencyPenalty = float(frequency_penalty) + except Exception as err: + print(f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}") + + try: + presence_penalty = rte.get_property_float(PROPERTY_PRESENCE_PENALTY) + openai_chatgpt_config.PresencePenalty = float(presence_penalty) + except Exception as err: + print(f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}") + + try: + temperature = rte.get_property_float(PROPERTY_TEMPERATURE) + openai_chatgpt_config.Temperature = float(temperature) + except Exception as err: + print(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}") + + try: + top_p = rte.get_property_float(PROPERTY_TOP_P) + openai_chatgpt_config.TopP = float(top_p) + except Exception as err: + print(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}") + + try: + max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) + if max_tokens > 0: + openai_chatgpt_config.MaxTokens = int(max_tokens) + except Exception as err: + print(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}") + + try: + proxy_url = rte.get_property_string(PROPERTY_PROXY_URL) + openai_chatgpt_config.ProxyUrl = proxy_url + except Exception as err: + print(f"GetProperty optional {PROPERTY_PROXY_URL} failed, err: {err}") + + try: + greeting = rte.get_property_string(PROPERTY_GREETING) + except Exception as err: + print(f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}") + + try: + prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) + if prop_max_memory_length > 0: + max_memory_length = int(prop_max_memory_length) + except Exception as err: + print(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}") + + # Create openaiChatGPT instance + try: + openai_chatgpt = OpenAIChatGPT(openai_chatgpt_config) + print(f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.MaxTokens}, model: {openai_chatgpt_config.Model}") + except Exception as err: + print(f"newOpenaiChatGPT failed, err: {err}") + + # Send greeting if available + if greeting: + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + rte.send_data(output_data) + print(f"greeting [{greeting}] sent") + except Exception as err: + print(f"greeting [{greeting}] send failed, err: {err}") + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + print("OpenAIChatGPTExtension on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + print("OpenAIChatGPTExtension on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + print("OpenAIChatGPTExtension on_cmd") + cmd_json = cmd.to_json() + print("OpenAIChatGPTExtension on_cmd json: " + cmd_json) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: + print("OpenAIChatGPTExtension on_cmd") + + def on_data(self, rte: Rte, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + print(f"OpenAIChatGPTExtension on_data") + + try: + rte_data = Data.create("text_data") + rte_data.set_property_string("text", "hello, world, who are you!") + except Exception as e: + print(f"on_data new_data error, ", e) + return + + rte.send_data(rte_data) + + +@register_addon_as_extension("openai_chatgpt_python") +class OpenAIChatGPTExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + print("OpenAIChatGPTExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + print("on_create_instance") + rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + print("OpenAIChatGPTExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/openai_chatgpt_python/requirements.txt b/agents/addon/extension/openai_chatgpt_python/requirements.txt index 4efc8c760..d4da23b7a 100644 --- a/agents/addon/extension/openai_chatgpt_python/requirements.txt +++ b/agents/addon/extension/openai_chatgpt_python/requirements.txt @@ -1 +1,3 @@ pillow==10.4.0 +openai==1.35.13 +requests==2.32.3 \ No newline at end of file diff --git a/agents/bin/start b/agents/bin/start index dbf68737f..27d563949 100755 --- a/agents/bin/start +++ b/agents/bin/start @@ -4,10 +4,10 @@ set -e cd "$(dirname "${BASH_SOURCE[0]}")/.." -python3 -m venv . -cd bin -source activate -cd .. +# python3 -m venv . +# cd bin +# source activate +# cd .. if [[ -f "requirements.txt" ]]; then pip install -r requirements.txt From af0e1fae70e115699a02d8b5ae4a5a11a2dfa70f Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sat, 13 Jul 2024 18:13:40 +0800 Subject: [PATCH 12/72] feat: openai working initially --- .../openai_chatgpt_python/openai_chatgpt.py | 33 ++-- .../openai_chatgpt_extension.py | 169 +++++++++++++++--- 2 files changed, 167 insertions(+), 35 deletions(-) diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py index f8e6658cd..2b885b6e9 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py @@ -1,6 +1,6 @@ import random import requests -import openai +from openai import OpenAI from typing import List, Dict, Any, Optional @@ -47,41 +47,44 @@ def default_config(cls): class OpenAIChatGPT: + client = None def __init__(self, config: OpenAIChatGPTConfig): self.config = config - openai.api_key = config.api_key - openai.api_base = config.base_url + print(f"OpenAIChatGPT initialized with config: {config.api_key}") + self.client = OpenAI( + api_key=config.api_key, + base_url=config.base_url + ) + self.session = requests.Session() if config.proxy_url: proxies = { "http": config.proxy_url, "https": config.proxy_url, } - self.session = requests.Session() self.session.proxies.update(proxies) - else: - self.session = requests.Session() + self.client.session = self.session def get_chat_completions_stream(self, messages): req = { - "model": self.config["Model"], + "model": self.config.model, "messages": [ { "role": "system", - "content": self.config["Prompt"], + "content": self.config.prompt, }, *messages, ], - "temperature": self.config["Temperature"], - "top_p": self.config["TopP"], - "presence_penalty": self.config["PresencePenalty"], - "frequency_penalty": self.config["FrequencyPenalty"], - "max_tokens": self.config["MaxTokens"], - "seed": self.config["Seed"], + "temperature": self.config.temperature, + "top_p": self.config.top_p, + "presence_penalty": self.config.presence_penalty, + "frequency_penalty": self.config.frequency_penalty, + "max_tokens": self.config.max_tokens, + "seed": self.config.seed, "stream": True, } try: - response = self.client.Completion.create(**req) + response = self.client.chat.completions.create(**req) return response except Exception as e: raise Exception(f"CreateChatCompletionStream failed, err: {e}") \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index c9eded8a5..2747eea47 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -6,6 +6,8 @@ # # from .openai_chatgpt import OpenAIChatGPT, OpenAIChatGPTConfig +from datetime import datetime +from threading import Thread from rte_runtime_python import ( Addon, Extension, @@ -42,10 +44,39 @@ PROPERTY_PROXY_URL = "proxy_url" # Optional PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional -memory = [] -max_memory_length = 10 +def get_current_time(): + # Get the current time + start_time = datetime.now() + # Get the number of microseconds since the Unix epoch + unix_microseconds = int(start_time.timestamp() * 1_000_000) + return unix_microseconds + +def is_punctuation(char): + if char in [',', ',', '.', '。', '?', '?', '!', '!']: + return True + return False + +def parse_sentence(sentence, content): + remain = "" + found_punc = False + + for char in content: + if not found_punc: + sentence += char + else: + remain += char + + if not found_punc and is_punctuation(char): + found_punc = True + + return sentence, remain, found_punc class OpenAIChatGPTExtension(Extension): + memory = [] + max_memory_length = 10 + outdate_ts = 0 + openai_chatgpt = None + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: print("OpenAIChatGPTExtension on_init") rte.on_init_done(manifest, property) @@ -58,64 +89,65 @@ def on_start(self, rte: Rte) -> None: try: base_url = rte.get_property_string(PROPERTY_BASE_URL) if base_url: - openai_chatgpt_config.BaseUrl = base_url + openai_chatgpt_config.base_url = base_url except Exception as err: print(f"GetProperty required {PROPERTY_BASE_URL} failed, err: {err}") try: api_key = rte.get_property_string(PROPERTY_API_KEY) - openai_chatgpt_config.ApiKey = api_key + openai_chatgpt_config.api_key = api_key except Exception as err: print(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") + return try: model = rte.get_property_string(PROPERTY_MODEL) if model: - openai_chatgpt_config.Model = model + openai_chatgpt_config.model = model except Exception as err: print(f"GetProperty optional {PROPERTY_MODEL} error: {err}") try: prompt = rte.get_property_string(PROPERTY_PROMPT) if prompt: - openai_chatgpt_config.Prompt = prompt + openai_chatgpt_config.prompt = prompt except Exception as err: print(f"GetProperty optional {PROPERTY_PROMPT} error: {err}") try: frequency_penalty = rte.get_property_float(PROPERTY_FREQUENCY_PENALTY) - openai_chatgpt_config.FrequencyPenalty = float(frequency_penalty) + openai_chatgpt_config.frequency_penalty = float(frequency_penalty) except Exception as err: print(f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}") try: presence_penalty = rte.get_property_float(PROPERTY_PRESENCE_PENALTY) - openai_chatgpt_config.PresencePenalty = float(presence_penalty) + openai_chatgpt_config.presence_penalty = float(presence_penalty) except Exception as err: print(f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}") try: temperature = rte.get_property_float(PROPERTY_TEMPERATURE) - openai_chatgpt_config.Temperature = float(temperature) + openai_chatgpt_config.temperature = float(temperature) except Exception as err: print(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}") try: top_p = rte.get_property_float(PROPERTY_TOP_P) - openai_chatgpt_config.TopP = float(top_p) + openai_chatgpt_config.top_p = float(top_p) except Exception as err: print(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}") try: max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) if max_tokens > 0: - openai_chatgpt_config.MaxTokens = int(max_tokens) + openai_chatgpt_config.max_tokens = int(max_tokens) except Exception as err: print(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}") try: proxy_url = rte.get_property_string(PROPERTY_PROXY_URL) - openai_chatgpt_config.ProxyUrl = proxy_url + openai_chatgpt_config.proxy_url = proxy_url except Exception as err: print(f"GetProperty optional {PROPERTY_PROXY_URL} failed, err: {err}") @@ -127,14 +159,14 @@ def on_start(self, rte: Rte) -> None: try: prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) if prop_max_memory_length > 0: - max_memory_length = int(prop_max_memory_length) + self.max_memory_length = int(prop_max_memory_length) except Exception as err: print(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}") # Create openaiChatGPT instance try: - openai_chatgpt = OpenAIChatGPT(openai_chatgpt_config) - print(f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.MaxTokens}, model: {openai_chatgpt_config.Model}") + self.openai_chatgpt = OpenAIChatGPT(openai_chatgpt_config) + print(f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.max_tokens}, model: {openai_chatgpt_config.model}") except Exception as err: print(f"newOpenaiChatGPT failed, err: {err}") @@ -180,14 +212,111 @@ def on_data(self, rte: Rte, data: Data) -> None: """ print(f"OpenAIChatGPTExtension on_data") + # Assume 'data' is an object from which we can get properties try: - rte_data = Data.create("text_data") - rte_data.set_property_string("text", "hello, world, who are you!") - except Exception as e: - print(f"on_data new_data error, ", e) + is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) + if not is_final: + print("ignore non-final input") + return + except Exception as err: + print(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") return - rte.send_data(rte_data) + # Get input text + try: + input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) + if not input_text: + print("ignore empty text") + return + print(f"OnData input text: [{input_text}]") + except Exception as err: + print(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + return + + # Prepare memory + if len(self.memory) > self.max_memory_length: + self.memory.pop(0) + self.memory.append({"role": "user", "content": input_text}) + + + def chat_completions_stream_worker(start_time, input_text, memory): + try: + print(f"GetChatCompletionsStream for input text: [{input_text}] memory: {memory}") + + # Get result from AI + resp = self.openai_chatgpt.get_chat_completions_stream(memory) + if resp is None: + print(f"GetChatCompletionsStream for input text: [{input_text}] failed") + return + + sentence = "" + full_content = "" + first_sentence_sent = False + + for chat_completions in resp: + if start_time < self.outdate_ts: + print(f"GetChatCompletionsStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}") + break + + if len(chat_completions.choices) > 0 and chat_completions.choices[0].delta.content is not None: + content = chat_completions.choices[0].delta.content + else: + content = "" + + full_content += content + + while True: + sentence, content, sentence_is_final = parse_sentence(sentence, content) + if len(sentence) == 0 or not sentence_is_final: + print(f"sentence {sentence} is empty or not final") + break + print(f"GetChatCompletionsStream recv for input text: [{input_text}] got sentence: [{sentence}]") + + # send sentence + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False) + rte.send_data(output_data) + print(f"GetChatCompletionsStream recv for input text: [{input_text}] sent sentence [{sentence}]") + except Exception as err: + print(f"GetChatCompletionsStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}") + break + + sentence = "" + if not first_sentence_sent: + first_sentence_sent = True + print(f"GetChatCompletionsStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms") + + # remember response as assistant content in memory + memory.append({"role": "assistant", "content": full_content}) + + # send end of segment + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + rte.send_data(output_data) + print(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent") + except Exception as err: + print(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}") + + except Exception as e: + print(f"GetChatCompletionsStream for input text: [{input_text}] failed, err: {e}") + + # Start thread to request and read responses from OpenAI + start_time = get_current_time() + thread = Thread(target=chat_completions_stream_worker, args=(start_time, input_text, self.memory)) + thread.start() + thread.join() + # try: + # rte_data = Data.create("text_data") + # rte_data.set_property_string("text", "hello, world, who are you!") + # except Exception as e: + # print(f"on_data new_data error, ", e) + # return + + # rte.send_data(rte_data) @register_addon_as_extension("openai_chatgpt_python") From c011a359092d2d9c60fb93ebc95cfc25d573d141 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Sat, 13 Jul 2024 14:14:53 +0000 Subject: [PATCH 13/72] fix: tts task not execute --- agents/addon/extension/azure_tts/src/tts.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/addon/extension/azure_tts/src/tts.cc b/agents/addon/extension/azure_tts/src/tts.cc index ea5a2919d..4bcd1ce7e 100644 --- a/agents/addon/extension/azure_tts/src/tts.cc +++ b/agents/addon/extension/azure_tts/src/tts.cc @@ -36,7 +36,7 @@ bool AzureTTS::Start() { { std::unique_lock lk(tasks_mutex_); - tasks_cv_.wait(lk); + tasks_cv_.wait(lk, [this]() { return !tasks_.empty(); }); if (tasks_.empty()) { continue; } From b8a604119b1bd007242e972b17584e42792ca195 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sat, 13 Jul 2024 22:29:14 +0800 Subject: [PATCH 14/72] fix: prevent thread blocking in on_data --- .../extension/openai_chatgpt_python/openai_chatgpt_extension.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 2747eea47..30f943b21 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -308,7 +308,7 @@ def chat_completions_stream_worker(start_time, input_text, memory): start_time = get_current_time() thread = Thread(target=chat_completions_stream_worker, args=(start_time, input_text, self.memory)) thread.start() - thread.join() + print(f"OpenAIChatGPTExtension on_data end") # try: # rte_data = Data.create("text_data") # rte_data.set_property_string("text", "hello, world, who are you!") From c007a5408704cc223258873a34d7e3c0ac498f5c Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Sat, 13 Jul 2024 14:44:35 +0000 Subject: [PATCH 15/72] fix: tts not stop --- agents/addon/extension/azure_tts/src/tts.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/agents/addon/extension/azure_tts/src/tts.cc b/agents/addon/extension/azure_tts/src/tts.cc index 4bcd1ce7e..6da755445 100644 --- a/agents/addon/extension/azure_tts/src/tts.cc +++ b/agents/addon/extension/azure_tts/src/tts.cc @@ -36,18 +36,14 @@ bool AzureTTS::Start() { { std::unique_lock lk(tasks_mutex_); - tasks_cv_.wait(lk, [this]() { return !tasks_.empty(); }); - if (tasks_.empty()) { - continue; + tasks_cv_.wait(lk, [this]() { return stop_.load() || !tasks_.empty(); }); + if (stop_.load()) { + break; } task = std::move(tasks_.front()); tasks_.pop(); } - if (stop_.load()) { - break; - } - SpeechText(task->text, task->ts); } From b5062c88e58fb6cfada9e7ba5ab420cac6562266 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Sat, 13 Jul 2024 14:45:34 +0000 Subject: [PATCH 16/72] fix: api --- .../openai_chatgpt_python/manifest.json | 70 +++++++++---------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json index 671e6a29d..629395bf5 100644 --- a/agents/addon/extension/openai_chatgpt_python/manifest.json +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -12,45 +12,43 @@ ], "api": { "property": { - "width": { - "type": "int32" + "api_key": { + "type": "string" }, - "height": { - "type": "int32" + "frequency_penalty": { + "type": "float64" + }, + "presence_penalty": { + "type": "float64" + }, + "temperature": { + "type": "float64" + }, + "top_p": { + "type": "float64" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "base_url": { + "type": "string" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "proxy_url": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" } }, - "image_frame_in": [ - { - "name": "image_frame", - "property": { - "timestamp": { - "type": "int64" - }, - "width": { - "type": "int32" - }, - "height": { - "type": "int32" - } - } - } - ], - "image_frame_out": [ - { - "name": "image_frame", - "property": { - "timestamp": { - "type": "int64" - }, - "width": { - "type": "int32" - }, - "height": { - "type": "int32" - } - } - } - ], "data_in": [ { "name": "text_data", From f58b1ba717166928a90abe6c6b8bb280fbdc9cca Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sat, 13 Jul 2024 23:23:16 +0800 Subject: [PATCH 17/72] feat: add interrupter --- .../openai_chatgpt_python/manifest.json | 10 ++++++++ .../openai_chatgpt_extension.py | 14 +++++++++++ agents/manifest.json.example | 25 +++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json index 629395bf5..290b16299 100644 --- a/agents/addon/extension/openai_chatgpt_python/manifest.json +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -68,6 +68,16 @@ } } } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } ] } } \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 30f943b21..8e8a282be 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -195,6 +195,20 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_json = cmd.to_json() print("OpenAIChatGPTExtension on_cmd json: " + cmd_json) + cmd_name = cmd.get_name() + + if cmd_name == CMD_IN_FLUSH: + self.outdate_ts = get_current_time() + print(f"OpenAIChatGPTExtension on_cmd flush") + cmd_out = Cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(cmd_out, None) + else: + print(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") + cmd_result = CmdResult.create(StatusCode.ERROR) + cmd_result.set_property_string("detail", "unknown cmd") + rte.return_result(cmd_result, cmd) + return + cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 69538cb0c..8b9f220d2 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -75,6 +75,12 @@ "azure_synthesis_voice_name": "en-US-JaneNeural" } }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector_python" + }, { "type": "extension_group", "addon": "default_extension_group", @@ -99,6 +105,10 @@ { "name": "text_data", "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector_python" + }, { "extension_group": "chatgpt", "extension": "openai_chatgpt" @@ -169,6 +179,21 @@ ] } ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector_python", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] } ] } From 2e1fecb47a2add064c5099b5ff29d3398d06a764 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 00:16:11 +0800 Subject: [PATCH 18/72] feat: add logs to openai ext --- .../openai_chatgpt_python/__init__.py | 3 +- .../extension/openai_chatgpt_python/log.py | 13 +++ .../openai_chatgpt_python/openai_chatgpt.py | 3 +- .../openai_chatgpt_extension.py | 95 ++++++++++--------- 4 files changed, 65 insertions(+), 49 deletions(-) create mode 100644 agents/addon/extension/openai_chatgpt_python/log.py diff --git a/agents/addon/extension/openai_chatgpt_python/__init__.py b/agents/addon/extension/openai_chatgpt_python/__init__.py index 1dc25495b..0b89ea8a7 100644 --- a/agents/addon/extension/openai_chatgpt_python/__init__.py +++ b/agents/addon/extension/openai_chatgpt_python/__init__.py @@ -1,3 +1,4 @@ from . import openai_chatgpt_extension +from .log import logger -print("openai_chatgpt_python extension loaded") +logger.info("openai_chatgpt_python extension loaded") diff --git a/agents/addon/extension/openai_chatgpt_python/log.py b/agents/addon/extension/openai_chatgpt_python/log.py new file mode 100644 index 000000000..fa2202da0 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("openai_chatgpt_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py index 2b885b6e9..5ad5b6cc9 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt.py @@ -2,6 +2,7 @@ import requests from openai import OpenAI from typing import List, Dict, Any, Optional +from .log import logger class OpenAIChatGPTConfig: @@ -50,7 +51,7 @@ class OpenAIChatGPT: client = None def __init__(self, config: OpenAIChatGPTConfig): self.config = config - print(f"OpenAIChatGPT initialized with config: {config.api_key}") + logger.info(f"OpenAIChatGPT initialized with config: {config.api_key}") self.client = OpenAI( api_key=config.api_key, base_url=config.base_url diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 8e8a282be..75ce1c7a8 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -22,6 +22,7 @@ ) from rte_runtime_python.image_frame import ImageFrame from PIL import Image, ImageFilter +from .log import logger CMD_IN_FLUSH = "flush" @@ -78,11 +79,11 @@ class OpenAIChatGPTExtension(Extension): openai_chatgpt = None def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: - print("OpenAIChatGPTExtension on_init") + logger.info("OpenAIChatGPTExtension on_init") rte.on_init_done(manifest, property) def on_start(self, rte: Rte) -> None: - print("OpenAIChatGPTExtension on_start") + logger.info("OpenAIChatGPTExtension on_start") # Prepare configuration openai_chatgpt_config = OpenAIChatGPTConfig.default_config() @@ -91,13 +92,13 @@ def on_start(self, rte: Rte) -> None: if base_url: openai_chatgpt_config.base_url = base_url except Exception as err: - print(f"GetProperty required {PROPERTY_BASE_URL} failed, err: {err}") + logger.info(f"GetProperty required {PROPERTY_BASE_URL} failed, err: {err}") try: api_key = rte.get_property_string(PROPERTY_API_KEY) openai_chatgpt_config.api_key = api_key except Exception as err: - print(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") + logger.info(f"GetProperty required {PROPERTY_API_KEY} failed, err: {err}") return try: @@ -105,70 +106,70 @@ def on_start(self, rte: Rte) -> None: if model: openai_chatgpt_config.model = model except Exception as err: - print(f"GetProperty optional {PROPERTY_MODEL} error: {err}") + logger.info(f"GetProperty optional {PROPERTY_MODEL} error: {err}") try: prompt = rte.get_property_string(PROPERTY_PROMPT) if prompt: openai_chatgpt_config.prompt = prompt except Exception as err: - print(f"GetProperty optional {PROPERTY_PROMPT} error: {err}") + logger.info(f"GetProperty optional {PROPERTY_PROMPT} error: {err}") try: frequency_penalty = rte.get_property_float(PROPERTY_FREQUENCY_PENALTY) openai_chatgpt_config.frequency_penalty = float(frequency_penalty) except Exception as err: - print(f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}") try: presence_penalty = rte.get_property_float(PROPERTY_PRESENCE_PENALTY) openai_chatgpt_config.presence_penalty = float(presence_penalty) except Exception as err: - print(f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}") try: temperature = rte.get_property_float(PROPERTY_TEMPERATURE) openai_chatgpt_config.temperature = float(temperature) except Exception as err: - print(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}") try: top_p = rte.get_property_float(PROPERTY_TOP_P) openai_chatgpt_config.top_p = float(top_p) except Exception as err: - print(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}") try: max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) if max_tokens > 0: openai_chatgpt_config.max_tokens = int(max_tokens) except Exception as err: - print(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}") try: proxy_url = rte.get_property_string(PROPERTY_PROXY_URL) openai_chatgpt_config.proxy_url = proxy_url except Exception as err: - print(f"GetProperty optional {PROPERTY_PROXY_URL} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_PROXY_URL} failed, err: {err}") try: greeting = rte.get_property_string(PROPERTY_GREETING) except Exception as err: - print(f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}") try: prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) if prop_max_memory_length > 0: self.max_memory_length = int(prop_max_memory_length) except Exception as err: - print(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}") + logger.info(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}") # Create openaiChatGPT instance try: self.openai_chatgpt = OpenAIChatGPT(openai_chatgpt_config) - print(f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.max_tokens}, model: {openai_chatgpt_config.model}") + logger.info(f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.max_tokens}, model: {openai_chatgpt_config.model}") except Exception as err: - print(f"newOpenaiChatGPT failed, err: {err}") + logger.info(f"newOpenaiChatGPT failed, err: {err}") # Send greeting if available if greeting: @@ -177,33 +178,33 @@ def on_start(self, rte: Rte) -> None: output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) rte.send_data(output_data) - print(f"greeting [{greeting}] sent") + logger.info(f"greeting [{greeting}] sent") except Exception as err: - print(f"greeting [{greeting}] send failed, err: {err}") + logger.info(f"greeting [{greeting}] send failed, err: {err}") rte.on_start_done() def on_stop(self, rte: Rte) -> None: - print("OpenAIChatGPTExtension on_stop") + logger.info("OpenAIChatGPTExtension on_stop") rte.on_stop_done() def on_deinit(self, rte: Rte) -> None: - print("OpenAIChatGPTExtension on_deinit") + logger.info("OpenAIChatGPTExtension on_deinit") rte.on_deinit_done() def on_cmd(self, rte: Rte, cmd: Cmd) -> None: - print("OpenAIChatGPTExtension on_cmd") + logger.info("OpenAIChatGPTExtension on_cmd") cmd_json = cmd.to_json() - print("OpenAIChatGPTExtension on_cmd json: " + cmd_json) + logger.info("OpenAIChatGPTExtension on_cmd json: " + cmd_json) cmd_name = cmd.get_name() if cmd_name == CMD_IN_FLUSH: self.outdate_ts = get_current_time() - print(f"OpenAIChatGPTExtension on_cmd flush") cmd_out = Cmd.create(CMD_OUT_FLUSH) rte.send_cmd(cmd_out, None) + logger.info(f"OpenAIChatGPTExtension on_cmd sent flush") else: - print(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") + logger.info(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") cmd_result = CmdResult.create(StatusCode.ERROR) cmd_result.set_property_string("detail", "unknown cmd") rte.return_result(cmd_result, cmd) @@ -214,7 +215,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: rte.return_result(cmd_result, cmd) def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: - print("OpenAIChatGPTExtension on_cmd") + logger.info("OpenAIChatGPTExtension on_cmd") def on_data(self, rte: Rte, data: Data) -> None: """ @@ -224,27 +225,27 @@ def on_data(self, rte: Rte, data: Data) -> None: example: {name: text_data, properties: {text: "hello"} """ - print(f"OpenAIChatGPTExtension on_data") + logger.info(f"OpenAIChatGPTExtension on_data") # Assume 'data' is an object from which we can get properties try: is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) if not is_final: - print("ignore non-final input") + logger.info("ignore non-final input") return except Exception as err: - print(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") return # Get input text try: input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) if not input_text: - print("ignore empty text") + logger.info("ignore empty text") return - print(f"OnData input text: [{input_text}]") + logger.info(f"OnData input text: [{input_text}]") except Exception as err: - print(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") return # Prepare memory @@ -255,12 +256,12 @@ def on_data(self, rte: Rte, data: Data) -> None: def chat_completions_stream_worker(start_time, input_text, memory): try: - print(f"GetChatCompletionsStream for input text: [{input_text}] memory: {memory}") + logger.info(f"GetChatCompletionsStream for input text: [{input_text}] memory: {memory}") # Get result from AI resp = self.openai_chatgpt.get_chat_completions_stream(memory) if resp is None: - print(f"GetChatCompletionsStream for input text: [{input_text}] failed") + logger.info(f"GetChatCompletionsStream for input text: [{input_text}] failed") return sentence = "" @@ -269,7 +270,7 @@ def chat_completions_stream_worker(start_time, input_text, memory): for chat_completions in resp: if start_time < self.outdate_ts: - print(f"GetChatCompletionsStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}") + logger.info(f"GetChatCompletionsStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}") break if len(chat_completions.choices) > 0 and chat_completions.choices[0].delta.content is not None: @@ -282,9 +283,9 @@ def chat_completions_stream_worker(start_time, input_text, memory): while True: sentence, content, sentence_is_final = parse_sentence(sentence, content) if len(sentence) == 0 or not sentence_is_final: - print(f"sentence {sentence} is empty or not final") + logger.info(f"sentence {sentence} is empty or not final") break - print(f"GetChatCompletionsStream recv for input text: [{input_text}] got sentence: [{sentence}]") + logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] got sentence: [{sentence}]") # send sentence try: @@ -292,15 +293,15 @@ def chat_completions_stream_worker(start_time, input_text, memory): output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False) rte.send_data(output_data) - print(f"GetChatCompletionsStream recv for input text: [{input_text}] sent sentence [{sentence}]") + logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] sent sentence [{sentence}]") except Exception as err: - print(f"GetChatCompletionsStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}") + logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}") break sentence = "" if not first_sentence_sent: first_sentence_sent = True - print(f"GetChatCompletionsStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms") + logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms") # remember response as assistant content in memory memory.append({"role": "assistant", "content": full_content}) @@ -311,23 +312,23 @@ def chat_completions_stream_worker(start_time, input_text, memory): output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) rte.send_data(output_data) - print(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent") + logger.info(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent") except Exception as err: - print(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}") + logger.info(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}") except Exception as e: - print(f"GetChatCompletionsStream for input text: [{input_text}] failed, err: {e}") + logger.info(f"GetChatCompletionsStream for input text: [{input_text}] failed, err: {e}") # Start thread to request and read responses from OpenAI start_time = get_current_time() thread = Thread(target=chat_completions_stream_worker, args=(start_time, input_text, self.memory)) thread.start() - print(f"OpenAIChatGPTExtension on_data end") + logger.info(f"OpenAIChatGPTExtension on_data end") # try: # rte_data = Data.create("text_data") # rte_data.set_property_string("text", "hello, world, who are you!") # except Exception as e: - # print(f"on_data new_data error, ", e) + # logger.info(f"on_data new_data error, ", e) # return # rte.send_data(rte_data) @@ -336,15 +337,15 @@ def chat_completions_stream_worker(start_time, input_text, memory): @register_addon_as_extension("openai_chatgpt_python") class OpenAIChatGPTExtensionAddon(Addon): def on_init(self, rte: Rte, manifest, property) -> None: - print("OpenAIChatGPTExtensionAddon on_init") + logger.info("OpenAIChatGPTExtensionAddon on_init") rte.on_init_done(manifest, property) return def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: - print("on_create_instance") + logger.info("on_create_instance") rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) def on_deinit(self, rte: Rte) -> None: - print("OpenAIChatGPTExtensionAddon on_deinit") + logger.info("OpenAIChatGPTExtensionAddon on_deinit") rte.on_deinit_done() return From e84ea1b3692e7bb915d87a883e057a92a7a9d9b4 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 00:19:31 +0800 Subject: [PATCH 19/72] fix: sync manifest --- agents/manifest.json.example | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 8b9f220d2..8494b15c1 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -79,7 +79,7 @@ "type": "extension", "extension_group": "default", "addon": "interrupt_detector_python", - "name": "interrupt_detector_python" + "name": "interrupt_detector" }, { "type": "extension_group", @@ -107,7 +107,7 @@ "dest": [ { "extension_group": "default", - "extension": "interrupt_detector_python" + "extension": "interrupt_detector" }, { "extension_group": "chatgpt", @@ -182,7 +182,7 @@ }, { "extension_group": "default", - "extension": "interrupt_detector_python", + "extension": "interrupt_detector", "cmd": [ { "name": "flush", From ad6935831ff834d52efd28ce391b2de505634995 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Sun, 14 Jul 2024 02:03:46 +0000 Subject: [PATCH 20/72] feat: add --manifest --- agents/bin/start | 2 +- agents/main.py | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/agents/bin/start b/agents/bin/start index 27d563949..68bb9e4e1 100755 --- a/agents/bin/start +++ b/agents/bin/start @@ -34,4 +34,4 @@ export PYTHONPATH=lib:interface export RTE_HOME=. # export PYTHONMALLOC=malloc -python3 main.py +python3 main.py "$@" diff --git a/agents/main.py b/agents/main.py index 9b8c2908a..4a4d297cc 100644 --- a/agents/main.py +++ b/agents/main.py @@ -8,27 +8,23 @@ from glob import glob import importlib.util import os +import argparse from os.path import dirname -from rte_runtime_python import ( - App, -) - def log(msg): print("[PYTHON] {}".format(msg)) - -class TestApp(App): - def on_init(self, rte, manifest, property): - log("app on_init") - rte.on_init_done(manifest, property) - - def on_deinit(self, rte) -> None: - log("app on_deinit") - rte.on_deinit_done() - +def process_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--manifest", help="The absolute path of manifest.json" + ) + return parser.parse_args() if __name__ == "__main__": + args = process_args() basedir = dirname(__file__) log("app init") @@ -45,7 +41,27 @@ def on_deinit(self, rte) -> None: ) print("imported module: {}".format(module_name)) + from rte_runtime_python import App, MetadataType + class TestApp(App): + def on_init(self, rte, manifest, property): + log("app on_init") + + # Using the default manifest.json if not specified. + if self.manifest_path: + log("set manifest: {}".format(self.manifest_path)) + manifest.set(MetadataType.JSON_FILENAME, self.manifest_path) + + rte.on_init_done(manifest, property) + + def on_deinit(self, rte) -> None: + log("app on_deinit") + rte.on_deinit_done() + + def set_manifest_path(self, manifest_path): + self.manifest_path = manifest_path + app = TestApp() + app.set_manifest_path(args.manifest) log("app created") app.run(False) From 551911baccbad520110dd60bae55fd28df50e4db Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 10:49:03 +0800 Subject: [PATCH 21/72] tmp: tmp cmd test --- .../interrupt_detector_extension.py | 2 +- .../openai_chatgpt_extension.py | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py index cbb751f36..d1e388a95 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -84,7 +84,7 @@ def on_data(self, rte: Rte, data: Data) -> None: if final or len(text) >= 2: flush_cmd = Cmd.create(CMD_NAME_FLUSH) - rte.send_cmd(flush_cmd, None) + rte.send_cmd(flush_cmd, lambda rte, result: print("DefaultExtension send_cmd done")) logger.info(f"sent cmd: {CMD_NAME_FLUSH}") diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 75ce1c7a8..ac2fd66ea 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -198,17 +198,17 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_name = cmd.get_name() - if cmd_name == CMD_IN_FLUSH: - self.outdate_ts = get_current_time() - cmd_out = Cmd.create(CMD_OUT_FLUSH) - rte.send_cmd(cmd_out, None) - logger.info(f"OpenAIChatGPTExtension on_cmd sent flush") - else: - logger.info(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") - cmd_result = CmdResult.create(StatusCode.ERROR) - cmd_result.set_property_string("detail", "unknown cmd") - rte.return_result(cmd_result, cmd) - return + # if cmd_name == CMD_IN_FLUSH: + # self.outdate_ts = get_current_time() + # cmd_out = Cmd.create(CMD_OUT_FLUSH) + # rte.send_cmd(cmd_out, None) + # logger.info(f"OpenAIChatGPTExtension on_cmd sent flush") + # else: + # logger.info(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") + # cmd_result = CmdResult.create(StatusCode.ERROR) + # cmd_result.set_property_string("detail", "unknown cmd") + # rte.return_result(cmd_result, cmd) + # return cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") From 4bbe3e14157b42e3935503f120efbe3ce21d549d Mon Sep 17 00:00:00 2001 From: tomasliu <67892682+tomasliu-agora@users.noreply.github.com> Date: Sun, 14 Jul 2024 11:23:45 +0800 Subject: [PATCH 22/72] Feature/python expermential (#97) * init cosy tts and qwen llm * add flush and async * fix flush * revert change on inerrupt --- agents/addon/extension/cosy_tts/__init__.py | 3 + agents/addon/extension/cosy_tts/main.py | 177 ++++++++++++++ agents/addon/extension/cosy_tts/manifest.json | 54 +++++ agents/addon/extension/cosy_tts/property.json | 1 + .../addon/extension/cosy_tts/requirements.txt | 1 + .../extension/qwen_llm_python/__init__.py | 3 + .../addon/extension/qwen_llm_python/main.py | 219 ++++++++++++++++++ .../extension/qwen_llm_python/manifest.json | 71 ++++++ .../extension/qwen_llm_python/property.json | 1 + .../qwen_llm_python/requirements.txt | 1 + 10 files changed, 531 insertions(+) create mode 100644 agents/addon/extension/cosy_tts/__init__.py create mode 100644 agents/addon/extension/cosy_tts/main.py create mode 100644 agents/addon/extension/cosy_tts/manifest.json create mode 100644 agents/addon/extension/cosy_tts/property.json create mode 100644 agents/addon/extension/cosy_tts/requirements.txt create mode 100644 agents/addon/extension/qwen_llm_python/__init__.py create mode 100644 agents/addon/extension/qwen_llm_python/main.py create mode 100644 agents/addon/extension/qwen_llm_python/manifest.json create mode 100644 agents/addon/extension/qwen_llm_python/property.json create mode 100644 agents/addon/extension/qwen_llm_python/requirements.txt diff --git a/agents/addon/extension/cosy_tts/__init__.py b/agents/addon/extension/cosy_tts/__init__.py new file mode 100644 index 000000000..4fece02f2 --- /dev/null +++ b/agents/addon/extension/cosy_tts/__init__.py @@ -0,0 +1,3 @@ +from . import main + +print("cosy_tts extension loaded") diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py new file mode 100644 index 000000000..b65304522 --- /dev/null +++ b/agents/addon/extension/cosy_tts/main.py @@ -0,0 +1,177 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + PcmFrame, + RTE_PCM_FRAME_DATA_FMT, + Data, + StatusCode, + CmdResult, + MetadataInfo, + RTE_PIXEL_FMT, +) +from typing import List, Any +import dashscope +from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat + +class CosyTTSCallback(ResultCallback): + _player = None + _stream = None + + def __init__(self, rte: Rte, sample_rate: int): + super().__init__() + self.rte = rte + self.sample_rate = sample_rate + self.frame_size = int(self.sample_rate * 1 * 2 / 100) + + def on_open(self): + print("websocket is open.") + + def on_complete(self): + print("speech synthesis task complete successfully.") + + def on_error(self, message: str): + print(f"speech synthesis task failed, {message}") + + def on_close(self): + print("websocket is closed.") + + def on_event(self, message): + print(f"recv speech synthsis message {message}") + + def get_frame(self, data: bytes) -> PcmFrame: + f = PcmFrame.create("pcm_frame") + f.set_sample_rate = self.sample_rate + f.set_bytes_per_sample = 2 + f.set_number_of_channels = 1 + f.set_timestamp = 0 + f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_NON_INTERLEAVE) + f.set_samples_per_channel = self.sample_rate / 100 + f.alloc_buf(len(data)) + buff = f.lock_buf() + buff[:] = data + f.unlock_buf(buff) + return f + + def on_data(self, data: bytes) -> None: + print("audio result length:", len(data), self.frame_size) + try: + chunk = int(len(data) / self.frame_size) + offset = 0 + for i in range(0, chunk): + #print("****", i, offset, self.frame_size) + f = self.get_frame(data[offset:offset + self.frame_size]) + self.rte.send_pcm_frame(f) + #print("send pcm chunk", i) + offset += self.frame_size + + if offset < len(data): + #print("-----") + size = len(data) - offset + f = self.get_frame(data[offset:offset+size]) + self.rte.send_pcm_frame(f) + #print("send last pcm chunk") + except Exception as e: + print("exception:", e) + +class CosyTTSExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + self.api_key = "" + self.voice = "" + self.model = "" + self.sample_rate = 16000 + self.tts = None + self.callback = None + + def on_msg(self, msg: str): + print("on message", msg) + self.tts.streaming_call(msg) + + def on_init( + self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + print("CosyTTSExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + print("CosyTTSExtension on_start") + self.api_key = rte.get_property_string("api_key") + self.voice = rte.get_property_string("voice") + self.model = rte.get_property_string("model") + self.sample_rate = rte.get_property_int("sample_rate") + + dashscope.api_key = self.api_key + f = AudioFormat.PCM_16000HZ_MONO_16BIT + if self.sample_rate == 8000: + f = AudioFormat.PCM_8000HZ_MONO_16BIT + elif self.sample_rate == 16000: + f = AudioFormat.PCM_16000HZ_MONO_16BIT + elif self.sample_rate == 22050: + f = AudioFormat.PCM_22050HZ_MONO_16BIT + elif self.sample_rate == 24000: + f = AudioFormat.PCM_24000HZ_MONO_16BIT + elif self.sample_rate == 44100: + f = AudioFormat.PCM_44100HZ_MONO_16BIT + elif self.sample_rate == 48000: + f = AudioFormat.PCM_48000HZ_MONO_16BIT + else: + print("unknown sample rate", self.sample_rate) + exit() + + self.callback = CosyTTSCallback(rte, self.sample_rate) + self.tts = SpeechSynthesizer(model=self.model, voice=self.voice, format=f, callback=self.callback) + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + print("CosyTTSExtension on_stop") + self.tts.streaming_complete() + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + print("CosyTTSExtension on_deinit") + rte.on_deinit_done() + + def on_data(self, rte: Rte, data: Data) -> None: + print("CosyTTSExtension on_data") + inputText = data.get_property_string("text") + if len(inputText) == 0: + print("ignore empty text") + return + + print("on data", inputText) + self.on_msg(inputText) + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + print("CosyTTSExtension on_cmd") + cmd_json = cmd.to_json() + print("CosyTTSExtension on_cmd json: " + cmd_json) + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + +@register_addon_as_extension("cosy_tts") +class CosyTTSExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + print("CosyTTSExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str) -> Extension: + print("CosyTTSExtensionAddon on_create_instance") + return CosyTTSExtension(addon_name) + + def on_deinit(self, rte: Rte) -> None: + print("CosyTTSExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/cosy_tts/manifest.json b/agents/addon/extension/cosy_tts/manifest.json new file mode 100644 index 000000000..09067fe81 --- /dev/null +++ b/agents/addon/extension/cosy_tts/manifest.json @@ -0,0 +1,54 @@ +{ + "type": "extension", + "name": "cosy_tts", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "voice": { + "type": "string" + }, + "model": { + "type": "string" + }, + "sample_rate": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "pcm_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/cosy_tts/property.json b/agents/addon/extension/cosy_tts/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/cosy_tts/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/cosy_tts/requirements.txt b/agents/addon/extension/cosy_tts/requirements.txt new file mode 100644 index 000000000..f1c09c9e2 --- /dev/null +++ b/agents/addon/extension/cosy_tts/requirements.txt @@ -0,0 +1 @@ +dashscope==1.20.0 \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/__init__.py b/agents/addon/extension/qwen_llm_python/__init__.py new file mode 100644 index 000000000..d3832c27a --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/__init__.py @@ -0,0 +1,3 @@ +from . import main + +print("qwen_llm_python extension loaded") diff --git a/agents/addon/extension/qwen_llm_python/main.py b/agents/addon/extension/qwen_llm_python/main.py new file mode 100644 index 000000000..dad5da1ba --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/main.py @@ -0,0 +1,219 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, + RTE_PIXEL_FMT, +) +from rte_runtime_python.image_frame import ImageFrame +from typing import List, Any +import dashscope +import queue +from datetime import datetime +import threading +from http import HTTPStatus + +def isEnd(content: str) -> bool: + last = content[len(content)-1] + return last == ',' or last == ',' or \ + last == '.' or last == '。' or \ + last == '?' or last == '?' or \ + last == '!' or last == '!' + +class QWenLLMExtension(Extension): + def __init__(self, name: str): + super().__init__(name) + self.history = [] + self.api_key = "" + self.model = "" + self.prompt = "" + self.max_history = 10 + self.stopped = False + self.thread = None + self.outdateTs = datetime.now() + + self.queue = queue.Queue() + self.mutex = threading.Lock() + + def on_msg(self, role: str, content: str) -> None: + self.mutex.acquire() + try: + self.history.append({'role': role, 'content': content}) + if len(self.history) > self.max_history: + self.history = self.history[1:] + finally: + self.mutex.release() + + def get_messages(self) -> List[Any]: + messages = [] + if len(self.prompt) > 0: + messages.append({'role': 'system', 'content': self.prompt}) + self.mutex.acquire() + try: + for h in self.history: + messages.append(h) + finally: + self.mutex.release() + return messages + + def call(self, messages: List[Any]): + print("before call", messages) + response = dashscope.Generation.call("qwen-max", + messages=messages, + result_format='message', # set the result to be "message" format. + stream=False, # set streaming output + incremental_output=False # get streaming output incrementally + ) + if response.status_code == HTTPStatus.OK: + self.on_msg(response.output.choices[0]['message']['role'], response.output.choices[0]['message']['content']) + print("on response", response.output.choices[0]['message']['content']) + else: + print("Failed to get response", response) + + def call_with_stream(self, rte: Rte, ts :datetime.time, messages: List[Any]): + print("before call", messages) + if self.outdateTs > ts: + return + responses = dashscope.Generation.call("qwen-max", + messages=messages, + result_format='message', # set the result to be "message" format. + stream=True, # set streaming output + incremental_output=True # get streaming output incrementally + ) + total = "" + partial = "" + for response in responses: + if self.outdateTs > ts: + return + if response.status_code == HTTPStatus.OK: + temp = response.output.choices[0]['message']['content'] + partial += temp + if isEnd(temp): + d = Data.create("text_data") + d.set_property_bool("end_of_segment", isEnd(partial)) + d.set_property_string("text", partial) + rte.send_data(d) + total += partial + partial = "" + else: + print('Request id: %s, Status code: %s, error code: %s, error message: %s' % ( + response.request_id, response.status_code, + response.code, response.message + )) + return + if len(partial) > 0: + d = Data.create("text_data") + d.set_property_bool("end_of_segment", True) + d.set_property_string("text", partial) + rte.send_data(d) + total += partial + partial = "" + self.on_msg("assistant", total) + print("on response", total) + + def on_init( + self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo + ) -> None: + print("QWenLLMExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + print("QWenLLMExtension on_start") + self.api_key = rte.get_property_string("api_key") + self.mode = rte.get_property_string("model") + self.prompt = rte.get_property_string("prompt") + self.max_history = rte.get_property_int("max_memory_length") + + dashscope.api_key = self.api_key + self.thread = threading.Thread(target=self.async_handle, args=[rte]) + self.thread.start() + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + print("QWenLLMExtension on_stop") + self.stopped = True + self.flush() + self.thread.join() + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + print("QWenLLMExtension on_deinit") + rte.on_deinit_done() + + def flush(self): + print("QWenLLMExtension flush") + while not self.queue.empty(): + self.queue.get() + + def on_data(self, rte: Rte, data: Data) -> None: + print("QWenLLMExtension on_data") + is_final = data.get_property_bool("is_final") + if not is_final: + print("ignore non final") + return + + inputText = data.get_property_string("text") + if len(inputText) == 0: + print("ignore empty text") + return + + ts = datetime.now() + + print("on data ", inputText, ts) + self.queue.put((inputText, ts)) + + def async_handle(self, rte: Rte): + while not self.stopped: + inputText, ts = self.queue.get() + if self.outdateTs > ts: + continue + print("fetch from queue", inputText) + self.on_msg("user", inputText) + messages = self.get_messages() + self.call_with_stream(rte, ts, messages) + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + print("QWenLLMExtension on_cmd") + cmd_json = cmd.to_json() + print("QWenLLMExtension on_cmd json: " + cmd_json) + + cmdName = cmd.get_name() + if cmdName == "flush": + self.outdateTs = datetime.now() + self.flush() + else: + print("unknown cmd", cmdName) + + cmd_result = CmdResult.create(StatusCode.OK) + rte.return_result(cmd_result, cmd) + + def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: + print("QWenLLMExtension on_cmd") + +@register_addon_as_extension("qwen_llm_python") +class QWenLLMExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + print("QWenLLMExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str) -> Extension: + print("QWenLLMExtensionAddon on_create_instance") + return QWenLLMExtension(addon_name) + + def on_deinit(self, rte: Rte) -> None: + print("QWenLLMExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/qwen_llm_python/manifest.json b/agents/addon/extension/qwen_llm_python/manifest.json new file mode 100644 index 000000000..c226e41d0 --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/manifest.json @@ -0,0 +1,71 @@ +{ + "type": "extension", + "name": "qwen_llm_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] +} +} \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/property.json b/agents/addon/extension/qwen_llm_python/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/requirements.txt b/agents/addon/extension/qwen_llm_python/requirements.txt new file mode 100644 index 000000000..f1c09c9e2 --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/requirements.txt @@ -0,0 +1 @@ +dashscope==1.20.0 \ No newline at end of file From 9308a7c03bc51500738308326bf35ee8d76e9cca Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Sun, 14 Jul 2024 03:45:29 +0000 Subject: [PATCH 23/72] feat: default to gchr.io image --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 25c722927..0482866c9 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,7 +2,7 @@ // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile { "name": "astra", - "image": "agoraio/astra_agents_build", + "image": "ghcr.io/rte-design/astra_agents_build", "customizations": { "vscode": { From a6559a324d4edb3aee99b9c9bb66f9d8bc164bd1 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 12:14:42 +0800 Subject: [PATCH 24/72] fix: fix interrupt crash --- agents/addon/extension/azure_tts/src/main.cc | 2 +- agents/addon/extension/cosy_tts/log.py | 13 +++++++++++ agents/addon/extension/cosy_tts/main.py | 7 +++--- .../openai_chatgpt_extension.py | 22 +++++++++---------- 4 files changed, 29 insertions(+), 15 deletions(-) create mode 100644 agents/addon/extension/cosy_tts/log.py diff --git a/agents/addon/extension/azure_tts/src/main.cc b/agents/addon/extension/azure_tts/src/main.cc index 5e57bbe59..d716b5b08 100644 --- a/agents/addon/extension/azure_tts/src/main.cc +++ b/agents/addon/extension/azure_tts/src/main.cc @@ -168,6 +168,6 @@ class azure_tts_extension_t : public rte::extension_t { const std::string kDataFieldText{"text"}; }; -RTE_CXX_REGISTER_ADDON_AS_EXTENSION(azure_tts, azure_tts_extension_t); +RTE_CPP_REGISTER_ADDON_AS_EXTENSION(azure_tts, azure_tts_extension_t); } // namespace azure_tts_extension diff --git a/agents/addon/extension/cosy_tts/log.py b/agents/addon/extension/cosy_tts/log.py new file mode 100644 index 000000000..83e895964 --- /dev/null +++ b/agents/addon/extension/cosy_tts/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("COSY_TTS") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py index b65304522..6e6b16b82 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/main.py @@ -22,6 +22,7 @@ from typing import List, Any import dashscope from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat +from .log import logger class CosyTTSCallback(ResultCallback): _player = None @@ -167,9 +168,9 @@ def on_init(self, rte: Rte, manifest, property) -> None: rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str) -> Extension: - print("CosyTTSExtensionAddon on_create_instance") - return CosyTTSExtension(addon_name) + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(CosyTTSExtension(addon_name), context) def on_deinit(self, rte: Rte) -> None: print("CosyTTSExtensionAddon on_deinit") diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index ac2fd66ea..75ce1c7a8 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -198,17 +198,17 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_name = cmd.get_name() - # if cmd_name == CMD_IN_FLUSH: - # self.outdate_ts = get_current_time() - # cmd_out = Cmd.create(CMD_OUT_FLUSH) - # rte.send_cmd(cmd_out, None) - # logger.info(f"OpenAIChatGPTExtension on_cmd sent flush") - # else: - # logger.info(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") - # cmd_result = CmdResult.create(StatusCode.ERROR) - # cmd_result.set_property_string("detail", "unknown cmd") - # rte.return_result(cmd_result, cmd) - # return + if cmd_name == CMD_IN_FLUSH: + self.outdate_ts = get_current_time() + cmd_out = Cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(cmd_out, None) + logger.info(f"OpenAIChatGPTExtension on_cmd sent flush") + else: + logger.info(f"OpenAIChatGPTExtension on_cmd unknown cmd: {cmd_name}") + cmd_result = CmdResult.create(StatusCode.ERROR) + cmd_result.set_property_string("detail", "unknown cmd") + rte.return_result(cmd_result, cmd) + return cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") From 04f6d88175f461b75eea1de58144ca98d1ead1cd Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 14:27:02 +0800 Subject: [PATCH 25/72] fix: fix cosy --- agents/addon/extension/cosy_tts/main.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py index 6e6b16b82..ca66e26ae 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/main.py @@ -51,12 +51,12 @@ def on_event(self, message): def get_frame(self, data: bytes) -> PcmFrame: f = PcmFrame.create("pcm_frame") - f.set_sample_rate = self.sample_rate - f.set_bytes_per_sample = 2 - f.set_number_of_channels = 1 - f.set_timestamp = 0 - f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_NON_INTERLEAVE) - f.set_samples_per_channel = self.sample_rate / 100 + f.set_sample_rate(self.sample_rate) + f.set_bytes_per_sample(2) + f.set_number_of_channels(1) + # f.set_timestamp = 0 + f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE) + f.set_samples_per_channel(self.sample_rate // 100) f.alloc_buf(len(data)) buff = f.lock_buf() buff[:] = data From 2f373589893bbc8e232290efb17a221783ec0d97 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 14:52:00 +0800 Subject: [PATCH 26/72] feat: cn manifest example --- agents/manifest.json.cn.example | 202 ++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 agents/manifest.json.cn.example diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example new file mode 100644 index 000000000..8a8f6b1e6 --- /dev/null +++ b/agents/manifest.json.cn.example @@ -0,0 +1,202 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.2.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.2.1" + }, + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt_python", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + } + ] +} From 9f1b06ea4d475dec37af1b3d9b780bade86558d6 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 17:22:40 +0800 Subject: [PATCH 27/72] feat: support python manifest file --- .gitignore | 2 + Dockerfile | 2 + agents/manifest.json.en.example | 201 ++++++++++++++++++++++++++++++++ server/internal/http_server.go | 13 ++- server/internal/worker.go | 2 +- server/main.go | 11 +- 6 files changed, 225 insertions(+), 6 deletions(-) create mode 100644 agents/manifest.json.en.example diff --git a/.gitignore b/.gitignore index b08f1f735..2d8837829 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,8 @@ lib/ lib64 agents/manifest.json agents/manifest.elevenlabs.json +agents/manifest.cn.json +agents/manifest.en.json !agents/addon/manifest.json node_modules/ /out/ diff --git a/Dockerfile b/Dockerfile index 70b322943..c4bebb0d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,8 @@ WORKDIR /app COPY . . COPY agents/manifest.json.example agents/manifest.json COPY agents/manifest.json.elevenlabs.example agents/manifest.elevenlabs.json +COPY agents/manifest.json.cn.example agents/manifest.cn.json +COPY agents/manifest.json.en.example agents/manifest.en.json COPY agents/${SESSION_CONTROL_CONF} agents/session_control.conf RUN make build && \ diff --git a/agents/manifest.json.en.example b/agents/manifest.json.en.example new file mode 100644 index 000000000..8494b15c1 --- /dev/null +++ b/agents/manifest.json.en.example @@ -0,0 +1,201 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.2.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.2.1" + }, + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "chatgpt", + "addon": "openai_chatgpt_python", + "name": "openai_chatgpt", + "property": { + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", + "max_tokens": 512, + "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chatgpt" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] + } + ] + } + ] +} diff --git a/server/internal/http_server.go b/server/internal/http_server.go index cf49a94f0..eec7a8282 100644 --- a/server/internal/http_server.go +++ b/server/internal/http_server.go @@ -74,6 +74,8 @@ const ( ManifestJsonFile = "./agents/manifest.json" ManifestJsonFileElevenlabs = "./agents/manifest.elevenlabs.json" + ManifestJsonFileEN = "./agents/manifest.en.json" + ManifestJsonFileCN = "./agents/manifest.cn.json" TTSVendorAzure = "azure" TTSVendorElevenlabs = "elevenlabs" @@ -116,12 +118,17 @@ func NewHttpServer(httpServerConfig *HttpServerConfig) *HttpServer { } func (s *HttpServer) getManifestJsonFile(language string) (manifestJsonFile string) { - ttsVendor := s.getTtsVendor(language) + // ttsVendor := s.getTtsVendor(language) manifestJsonFile = ManifestJsonFile - if ttsVendor == TTSVendorElevenlabs { - manifestJsonFile = ManifestJsonFileElevenlabs + if language == languageEnglish { + manifestJsonFile = ManifestJsonFileEN + } else if language == languageChinese { + manifestJsonFile = ManifestJsonFileCN } + // if ttsVendor == TTSVendorElevenlabs { + // manifestJsonFile = ManifestJsonFileElevenlabs + // } return } diff --git a/server/internal/worker.go b/server/internal/worker.go index e3daf80a8..14a35c8c6 100644 --- a/server/internal/worker.go +++ b/server/internal/worker.go @@ -24,7 +24,7 @@ type Worker struct { const ( workerCleanSleepSeconds = 5 - workerExec = "/app/agents/bin/worker" + workerExec = "/app/agents/bin/start" ) var ( diff --git a/server/main.go b/server/main.go index d190387bc..0574609dd 100644 --- a/server/main.go +++ b/server/main.go @@ -46,8 +46,10 @@ func main() { slog.Info("server config", "ttsVendorChinese", httpServerConfig.TTSVendorChinese, "ttsVendorEnglish", httpServerConfig.TTSVendorEnglish, "workersMax", httpServerConfig.WorkersMax, "workerQuitTimeoutSeconds", httpServerConfig.WorkerQuitTimeoutSeconds) - processManifest(internal.ManifestJsonFile) - processManifest(internal.ManifestJsonFileElevenlabs) + // processManifest(internal.ManifestJsonFile) + // processManifest(internal.ManifestJsonFileElevenlabs) + processManifest(internal.ManifestJsonFileEN) + processManifest(internal.ManifestJsonFileCN) httpServer := internal.NewHttpServer(httpServerConfig) httpServer.Start() } @@ -111,6 +113,11 @@ func processManifest(manifestJsonFile string) (err error) { manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="elevenlabs_tts").property.api_key`, elevenlabsTtsKey) } + cosyTtsKey := os.Getenv("COSY_TTS_KEY") + if cosyTtsKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="cosy_tts").property.api_key`, cosyTtsKey) + } + err = os.WriteFile(manifestJsonFile, []byte(manifestJson), 0644) return } From 091c7ad6f74ddd61d269cb1d4d7c5643253ab719 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 18:04:30 +0800 Subject: [PATCH 28/72] feat: update docker build file --- Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c4bebb0d0..1f42ac944 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM agoraio/astra_agents_build:latest AS builder +FROM agoraio/astra_agents_build:0.3.1 AS builder ARG SESSION_CONTROL_CONF=session_control.conf @@ -22,6 +22,10 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend libunwind-dev \ libc++1 \ libssl-dev \ + python3 \ + python3-venv \ + python3-pip \ + python3-dev \ ca-certificates \ && apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* From 2b9aa9617b96d693479a98902f81f03032d75f49 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Sun, 14 Jul 2024 18:51:44 +0800 Subject: [PATCH 29/72] fix: add files to package.sh --- agents/scripts/package.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agents/scripts/package.sh b/agents/scripts/package.sh index 05f10a215..9269ec03f 100755 --- a/agents/scripts/package.sh +++ b/agents/scripts/package.sh @@ -35,6 +35,8 @@ cp -r bin .release cp -r lib .release cp manifest.json .release cp manifest.elevenlabs.json .release +cp manifest.cn.json .release +cp manifest.en.json .release cp property.json .release # python main and deps From e6767bc4eab3e22498222330c16eef2d0a729fde Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Sun, 14 Jul 2024 19:14:08 +0800 Subject: [PATCH 30/72] fix: add jq in builder env --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 1f42ac944..0dbb63eb3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,7 @@ RUN apt-get clean && apt-get update && apt-get install -y --no-install-recommend python3-venv \ python3-pip \ python3-dev \ + jq \ ca-certificates \ && apt-get clean && rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* From e2b63658b982409fa11457f5e23b0ac37c5d24de Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 19:51:42 +0800 Subject: [PATCH 31/72] fix: readme updates --- README-CN.md | 5 ++--- README.md | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/README-CN.md b/README-CN.md index 868b02d90..f05b5657c 100644 --- a/README-CN.md +++ b/README-CN.md @@ -120,7 +120,7 @@ npm i && npm run dev cp ./agents/manifest.json.example ./agents/manifest.json # pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev agoraio/astra_agents_build +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 # enter docker image docker exec -it astra_agents_dev bash @@ -140,8 +140,7 @@ export AGORA_APP_CERTIFICATE= export AZURE_STT_KEY= export AZURE_STT_REGION= export OPENAI_API_KEY= -export AZURE_TTS_KEY= -export AZURE_TTS_REGION= +export COSY_TTS_KEY= # agent is ready to start on port 8080 diff --git a/README.md b/README.md index bbffddf63..d7b265a7d 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ You need to prepare the proper `manifest.json` file first. cp ./agents/manifest.json.example ./agents/manifest.json # pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev agoraio/astra_agents_build +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 # enter docker image docker exec -it astra_agents_dev bash @@ -156,8 +156,7 @@ export AGORA_APP_CERTIFICATE= export AZURE_STT_KEY= export AZURE_STT_REGION= export OPENAI_API_KEY= -export AZURE_TTS_KEY= -export AZURE_TTS_REGION= +export COSY_TTS_KEY= # agent is ready to start on port 8080 From 844d3156d77657ed5901068be5b8f28dc4fdcb1b Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 19:54:41 +0800 Subject: [PATCH 32/72] fix: fix readme --- README-CN.md | 5 ++++- README.md | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README-CN.md b/README-CN.md index f05b5657c..04f4da67e 100644 --- a/README-CN.md +++ b/README-CN.md @@ -120,7 +120,10 @@ npm i && npm run dev cp ./agents/manifest.json.example ./agents/manifest.json # pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 + +# for windows git bash +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 # enter docker image docker exec -it astra_agents_dev bash diff --git a/README.md b/README.md index d7b265a7d..bae54ea6d 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,9 @@ cp ./agents/manifest.json.example ./agents/manifest.json # pull the docker image with dev tools and mount your current folder as workspace docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 +# for windows git bash +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 + # enter docker image docker exec -it astra_agents_dev bash From bbbc8a196917500300e9be5e2658082568fbc382 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Sun, 14 Jul 2024 20:22:34 +0800 Subject: [PATCH 33/72] enhancement: builder image update --- Dockerfile | 2 +- README-CN.md | 13 ++++++++++--- README.md | 12 +++++++++--- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0dbb63eb3..80b500b1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM agoraio/astra_agents_build:0.3.1 AS builder +FROM agoraio/astra_agents_build:0.3.2 AS builder ARG SESSION_CONTROL_CONF=session_control.conf diff --git a/README-CN.md b/README-CN.md index 04f4da67e..4f48f4fe9 100644 --- a/README-CN.md +++ b/README-CN.md @@ -117,13 +117,14 @@ npm i && npm run dev ```shell # rename manifest example -cp ./agents/manifest.json.example ./agents/manifest.json +cp ./agents/manifest.json.en.example ./agents/manifest.en.json +cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json # pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 # for windows git bash -# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 # enter docker image docker exec -it astra_agents_dev bash @@ -145,6 +146,12 @@ export AZURE_STT_REGION= export OPENAI_API_KEY= export COSY_TTS_KEY= + +# if you use AZURE_TTS +export AZURE_TTS_KEY= +export AZURE_TTS_REGION= + + # agent is ready to start on port 8080 make run-server diff --git a/README.md b/README.md index bae54ea6d..eef6347ce 100644 --- a/README.md +++ b/README.md @@ -133,13 +133,14 @@ You need to prepare the proper `manifest.json` file first. ```bash # rename manifest example -cp ./agents/manifest.json.example ./agents/manifest.json +cp ./agents/manifest.json.en.example ./agents/manifest.en.json +cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json # pull the docker image with dev tools and mount your current folder as workspace -docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 # for windows git bash -# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.1 +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 # enter docker image docker exec -it astra_agents_dev bash @@ -161,6 +162,11 @@ export AZURE_STT_REGION= export OPENAI_API_KEY= export COSY_TTS_KEY= + +# if you use AZURE_TTS +export AZURE_TTS_KEY= +export AZURE_TTS_REGION= + # agent is ready to start on port 8080 make run-server From 6282ccfd2ddc42bfa6c6a58832f468e0ac29ffd5 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Sun, 14 Jul 2024 13:03:14 +0000 Subject: [PATCH 34/72] fix: package --- Dockerfile | 2 +- agents/scripts/package.sh | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 80b500b1e..26cd7d79a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ WORKDIR /app COPY . . COPY agents/manifest.json.example agents/manifest.json -COPY agents/manifest.json.elevenlabs.example agents/manifest.elevenlabs.json +# COPY agents/manifest.json.elevenlabs.example agents/manifest.elevenlabs.json COPY agents/manifest.json.cn.example agents/manifest.cn.json COPY agents/manifest.json.en.example agents/manifest.en.json COPY agents/${SESSION_CONTROL_CONF} agents/session_control.conf diff --git a/agents/scripts/package.sh b/agents/scripts/package.sh index 9269ec03f..f41f198f7 100755 --- a/agents/scripts/package.sh +++ b/agents/scripts/package.sh @@ -23,6 +23,14 @@ copy_extension() { if [[ $EXTENSION_LANGUAGE == "python" ]]; then # TODO: package 'publish' contents only cp addon/extension/$extension/*.py .release/addon/extension/$extension/ + if [[ -f addon/extension/$extension/requirements.txt ]]; then + cp addon/extension/$extension/requirements.txt .release/addon/extension/$extension/ + fi + + # TODO: copy specific contents + if [[ -d addon/extension/$extension/pb ]]; then + cp -r addon/extension/$extension/pb .release/addon/extension/$extension/ + fi fi fi @@ -34,7 +42,7 @@ copy_extension() { cp -r bin .release cp -r lib .release cp manifest.json .release -cp manifest.elevenlabs.json .release +#cp manifest.elevenlabs.json .release cp manifest.cn.json .release cp manifest.en.json .release cp property.json .release From bafc6828fc4a93585073d8cf06d538edff05efc5 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Sun, 14 Jul 2024 21:17:58 +0800 Subject: [PATCH 35/72] fix: readme & builder pack --- Dockerfile | 2 +- README-CN.md | 1 + README.md | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 26cd7d79a..b7518c99c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM agoraio/astra_agents_build:0.3.2 AS builder +FROM ghcr.io/rte-design/astra_agents_build:0.3.2 AS builder ARG SESSION_CONTROL_CONF=session_control.conf diff --git a/README-CN.md b/README-CN.md index 4f48f4fe9..a02c0c471 100644 --- a/README-CN.md +++ b/README-CN.md @@ -117,6 +117,7 @@ npm i && npm run dev ```shell # rename manifest example +cp ./agents/manifest.json.example ./agents/manifest.json cp ./agents/manifest.json.en.example ./agents/manifest.en.json cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json diff --git a/README.md b/README.md index eef6347ce..4806bd37d 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,7 @@ You need to prepare the proper `manifest.json` file first. ```bash # rename manifest example +cp ./agents/manifest.json.example ./agents/manifest.json cp ./agents/manifest.json.en.example ./agents/manifest.en.json cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json From b6eefd5c9c5ff20791b1948958db04bf1aac99e7 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Sun, 14 Jul 2024 21:22:54 +0800 Subject: [PATCH 36/72] feat: enhance cosy & qwen extensions --- agents/addon/extension/cosy_tts/main.py | 146 +++++++++++++----- .../interrupt_detector_extension.py | 8 +- .../interrupt_detector_python/manifest.json | 13 ++ agents/addon/extension/qwen_llm_python/log.py | 13 ++ .../addon/extension/qwen_llm_python/main.py | 97 +++++++----- 5 files changed, 198 insertions(+), 79 deletions(-) create mode 100644 agents/addon/extension/qwen_llm_python/log.py diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py index ca66e26ae..3208be098 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/main.py @@ -21,6 +21,9 @@ ) from typing import List, Any import dashscope +import queue +import threading +from datetime import datetime from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat from .log import logger @@ -33,21 +36,22 @@ def __init__(self, rte: Rte, sample_rate: int): self.rte = rte self.sample_rate = sample_rate self.frame_size = int(self.sample_rate * 1 * 2 / 100) + self.canceled = False def on_open(self): - print("websocket is open.") + logger.info("websocket is open.") def on_complete(self): - print("speech synthesis task complete successfully.") + logger.info("speech synthesis task complete successfully.") def on_error(self, message: str): - print(f"speech synthesis task failed, {message}") + logger.info(f"speech synthesis task failed, {message}") def on_close(self): - print("websocket is closed.") + logger.info("websocket is closed.") def on_event(self, message): - print(f"recv speech synthsis message {message}") + logger.info(f"recv speech synthsis message {message}") def get_frame(self, data: bytes) -> PcmFrame: f = PcmFrame.create("pcm_frame") @@ -62,27 +66,33 @@ def get_frame(self, data: bytes) -> PcmFrame: buff[:] = data f.unlock_buf(buff) return f + + def cancel(self) -> None: + self.canceled = True def on_data(self, data: bytes) -> None: - print("audio result length:", len(data), self.frame_size) + if self.canceled: + return + + logger.info("audio result length: %d, %d", len(data), self.frame_size) try: - chunk = int(len(data) / self.frame_size) - offset = 0 - for i in range(0, chunk): - #print("****", i, offset, self.frame_size) - f = self.get_frame(data[offset:offset + self.frame_size]) - self.rte.send_pcm_frame(f) - #print("send pcm chunk", i) - offset += self.frame_size + chunk = int(len(data) / self.frame_size) + offset = 0 + for i in range(0, chunk): + if self.canceled: + return + f = self.get_frame(data[offset:offset + self.frame_size]) + self.rte.send_pcm_frame(f) + offset += self.frame_size - if offset < len(data): - #print("-----") - size = len(data) - offset - f = self.get_frame(data[offset:offset+size]) - self.rte.send_pcm_frame(f) - #print("send last pcm chunk") + if self.canceled: + return + if offset < len(data): + size = len(data) - offset + f = self.get_frame(data[offset:offset+size]) + self.rte.send_pcm_frame(f) except Exception as e: - print("exception:", e) + logger.exception(e) class CosyTTSExtension(Extension): def __init__(self, name: str): @@ -93,19 +103,21 @@ def __init__(self, name: str): self.sample_rate = 16000 self.tts = None self.callback = None + self.format = None - def on_msg(self, msg: str): - print("on message", msg) - self.tts.streaming_call(msg) + self.stopped = False + self.thread = None + self.queue = queue.Queue() + self.mutex = threading.Lock() def on_init( self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo ) -> None: - print("CosyTTSExtension on_init") + logger.info("CosyTTSExtension on_init") rte.on_init_done(manifest, property) def on_start(self, rte: Rte) -> None: - print("CosyTTSExtension on_start") + logger.info("CosyTTSExtension on_start") self.api_key = rte.get_property_string("api_key") self.voice = rte.get_property_string("voice") self.model = rte.get_property_string("model") @@ -126,36 +138,92 @@ def on_start(self, rte: Rte) -> None: elif self.sample_rate == 48000: f = AudioFormat.PCM_48000HZ_MONO_16BIT else: - print("unknown sample rate", self.sample_rate) + logger.info("unknown sample rate %d", self.sample_rate) exit() - self.callback = CosyTTSCallback(rte, self.sample_rate) - self.tts = SpeechSynthesizer(model=self.model, voice=self.voice, format=f, callback=self.callback) + self.format = f + + self.thread = threading.Thread(target=self.async_handle, args=[rte]) + self.thread.start() rte.on_start_done() def on_stop(self, rte: Rte) -> None: - print("CosyTTSExtension on_stop") + logger.info("CosyTTSExtension on_stop") + + self.stopped = True self.tts.streaming_complete() + self.flush() + self.thread.join() rte.on_stop_done() def on_deinit(self, rte: Rte) -> None: - print("CosyTTSExtension on_deinit") + logger.info("CosyTTSExtension on_deinit") rte.on_deinit_done() + def need_interrupt(self, ts: datetime.time) -> bool: + return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 + + def async_handle(self, rte: Rte): + tts = None + callback = None + while not self.stopped: + try: + inputText, is_end, ts = self.queue.get() + if len(inputText) == 0: + if tts is not None: + tts.streaming_cancel() + if callback is not None: + callback.cancel() + tts = None + callback = None + continue + + if tts is None: + callback = CosyTTSCallback(rte, self.sample_rate) + tts = SpeechSynthesizer(model=self.model, voice=self.voice, format=self.format, callback=callback) + + logger.info("on message %s", inputText) + tts.streaming_call(inputText) + if is_end: + tts.streaming_complete() + tts = None + except Exception as e: + logger.exception(e) + finally: + tts = None + callback = None + + def flush(self): + logger.info("CosyTTSExtension flush") + while not self.queue.empty(): + self.queue.get() + self.queue.put(("", True, datetime.now())) + def on_data(self, rte: Rte, data: Data) -> None: - print("CosyTTSExtension on_data") + logger.info("CosyTTSExtension on_data") inputText = data.get_property_string("text") if len(inputText) == 0: - print("ignore empty text") + logger.info("ignore empty text") return - print("on data", inputText) - self.on_msg(inputText) + is_end = data.get_property_bool("end_of_segment") + + logger.info("on data %s %d", inputText, is_end) + self.queue.put((inputText, is_end, datetime.now())) def on_cmd(self, rte: Rte, cmd: Cmd) -> None: - print("CosyTTSExtension on_cmd") + logger.info("CosyTTSExtension on_cmd") cmd_json = cmd.to_json() - print("CosyTTSExtension on_cmd json: " + cmd_json) + logger.info("CosyTTSExtension on_cmd json: %s" + cmd_json) + + cmdName = cmd.get_name() + if cmdName == "flush": + self.outdateTs = datetime.now() + self.flush() + cmd_out = Cmd.create("flush") + rte.send_cmd(cmd_out, lambda rte, result: print("DefaultExtension send_cmd done")) + else: + logger.info("unknown cmd %s", cmdName) cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") @@ -164,7 +232,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: @register_addon_as_extension("cosy_tts") class CosyTTSExtensionAddon(Addon): def on_init(self, rte: Rte, manifest, property) -> None: - print("CosyTTSExtensionAddon on_init") + logger.info("CosyTTSExtensionAddon on_init") rte.on_init_done(manifest, property) return @@ -173,6 +241,6 @@ def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: rte.on_create_instance_done(CosyTTSExtension(addon_name), context) def on_deinit(self, rte: Rte) -> None: - print("CosyTTSExtensionAddon on_deinit") + logger.info("CosyTTSExtensionAddon on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py index d1e388a95..a970ce563 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -84,10 +84,14 @@ def on_data(self, rte: Rte, data: Data) -> None: if final or len(text) >= 2: flush_cmd = Cmd.create(CMD_NAME_FLUSH) - rte.send_cmd(flush_cmd, lambda rte, result: print("DefaultExtension send_cmd done")) + rte.send_cmd(flush_cmd, lambda rte, result: print("InterruptDetectorExtensionAddon send_cmd done")) logger.info(f"sent cmd: {CMD_NAME_FLUSH}") - + + d = Data.create("text_data") + d.set_property_bool(TEXT_DATA_FINAL_FIELD, final) + d.set_property_string(TEXT_DATA_TEXT_FIELD, text) + rte.send_data(d) @register_addon_as_extension("interrupt_detector_python") class InterruptDetectorExtensionAddon(Addon): diff --git a/agents/addon/extension/interrupt_detector_python/manifest.json b/agents/addon/extension/interrupt_detector_python/manifest.json index 9b1b2461c..cc261c7c4 100644 --- a/agents/addon/extension/interrupt_detector_python/manifest.json +++ b/agents/addon/extension/interrupt_detector_python/manifest.json @@ -28,6 +28,19 @@ { "name": "flush" } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } ] } } \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/log.py b/agents/addon/extension/qwen_llm_python/log.py new file mode 100644 index 000000000..98c232283 --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("qwen_llm_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/qwen_llm_python/main.py b/agents/addon/extension/qwen_llm_python/main.py index dad5da1ba..7f5977203 100644 --- a/agents/addon/extension/qwen_llm_python/main.py +++ b/agents/addon/extension/qwen_llm_python/main.py @@ -24,6 +24,7 @@ from datetime import datetime import threading from http import HTTPStatus +from .log import logger def isEnd(content: str) -> bool: last = content[len(content)-1] @@ -43,6 +44,7 @@ def __init__(self, name: str): self.stopped = False self.thread = None self.outdateTs = datetime.now() + self.ongoing = "" self.queue = queue.Queue() self.mutex = threading.Lock() @@ -67,9 +69,12 @@ def get_messages(self) -> List[Any]: finally: self.mutex.release() return messages - + + def need_interrupt(self, ts: datetime.time) -> bool: + return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 + def call(self, messages: List[Any]): - print("before call", messages) + logger.info("before call %s", messages) response = dashscope.Generation.call("qwen-max", messages=messages, result_format='message', # set the result to be "message" format. @@ -78,14 +83,19 @@ def call(self, messages: List[Any]): ) if response.status_code == HTTPStatus.OK: self.on_msg(response.output.choices[0]['message']['role'], response.output.choices[0]['message']['content']) - print("on response", response.output.choices[0]['message']['content']) + logger.info("on response %s", response.output.choices[0]['message']['content']) else: - print("Failed to get response", response) + logger.info("Failed to get response %s", response) - def call_with_stream(self, rte: Rte, ts :datetime.time, messages: List[Any]): - print("before call", messages) - if self.outdateTs > ts: + def call_with_stream(self, rte: Rte, ts :datetime.time, inputText: str, messages: List[Any]): + if self.need_interrupt(ts): + logger.warning("out of date, %s, %s", self.outdateTs, ts) return + if len(self.ongoing) > 0: + messages.append({'role':'assistant', 'content':self.ongoing}) + messages.append({'role':'user', 'content':inputText}) + logger.info("before call %s %s", messages, ts) + responses = dashscope.Generation.call("qwen-max", messages=messages, result_format='message', # set the result to be "message" format. @@ -95,12 +105,17 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, messages: List[Any]): total = "" partial = "" for response in responses: - if self.outdateTs > ts: - return + if self.need_interrupt(ts): + if len(self.ongoing) > 0: + self.on_msg('assistant', self.ongoing) + self.ongoing = '' + logger.warning("out of date, %s, %s", self.outdateTs, ts) + return if response.status_code == HTTPStatus.OK: temp = response.output.choices[0]['message']['content'] partial += temp - if isEnd(temp): + self.ongoing += temp + if isEnd(temp) or len(partial) > 10: d = Data.create("text_data") d.set_property_bool("end_of_segment", isEnd(partial)) d.set_property_string("text", partial) @@ -108,7 +123,7 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, messages: List[Any]): total += partial partial = "" else: - print('Request id: %s, Status code: %s, error code: %s, error message: %s' % ( + logger.info('Request id: %s, Status code: %s, error code: %s, error message: %s' % ( response.request_id, response.status_code, response.code, response.message )) @@ -120,17 +135,19 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, messages: List[Any]): rte.send_data(d) total += partial partial = "" + self.ongoing = "" + self.on_msg("user", inputText) self.on_msg("assistant", total) - print("on response", total) + logger.info("on response %s", total) def on_init( self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo ) -> None: - print("QWenLLMExtension on_init") + logger.info("QWenLLMExtension on_init") rte.on_init_done(manifest, property) def on_start(self, rte: Rte) -> None: - print("QWenLLMExtension on_start") + logger.info("QWenLLMExtension on_start") self.api_key = rte.get_property_string("api_key") self.mode = rte.get_property_string("model") self.prompt = rte.get_property_string("prompt") @@ -142,78 +159,82 @@ def on_start(self, rte: Rte) -> None: rte.on_start_done() def on_stop(self, rte: Rte) -> None: - print("QWenLLMExtension on_stop") + logger.info("QWenLLMExtension on_stop") self.stopped = True self.flush() self.thread.join() rte.on_stop_done() def on_deinit(self, rte: Rte) -> None: - print("QWenLLMExtension on_deinit") + logger.info("QWenLLMExtension on_deinit") rte.on_deinit_done() def flush(self): - print("QWenLLMExtension flush") + logger.info("QWenLLMExtension flush") while not self.queue.empty(): self.queue.get() def on_data(self, rte: Rte, data: Data) -> None: - print("QWenLLMExtension on_data") + logger.info("QWenLLMExtension on_data") is_final = data.get_property_bool("is_final") if not is_final: - print("ignore non final") + logger.info("ignore non final") return inputText = data.get_property_string("text") if len(inputText) == 0: - print("ignore empty text") + logger.info("ignore empty text") return ts = datetime.now() - print("on data ", inputText, ts) + logger.info("on data %s, %s", inputText, ts) self.queue.put((inputText, ts)) def async_handle(self, rte: Rte): while not self.stopped: - inputText, ts = self.queue.get() - if self.outdateTs > ts: - continue - print("fetch from queue", inputText) - self.on_msg("user", inputText) - messages = self.get_messages() - self.call_with_stream(rte, ts, messages) + try: + inputText, ts = self.queue.get() + if self.need_interrupt(ts): + continue + logger.info("fetch from queue %s", inputText) + history = self.get_messages() + self.call_with_stream(rte, ts, inputText, history) + except Exception as e: + logger.exception(e) def on_cmd(self, rte: Rte, cmd: Cmd) -> None: - print("QWenLLMExtension on_cmd") + logger.info("QWenLLMExtension on_cmd") cmd_json = cmd.to_json() - print("QWenLLMExtension on_cmd json: " + cmd_json) + logger.info("QWenLLMExtension on_cmd json: %s", cmd_json) cmdName = cmd.get_name() if cmdName == "flush": self.outdateTs = datetime.now() - self.flush() + #self.flush() + cmd_out = Cmd.create("flush") + rte.send_cmd(cmd_out, lambda rte, result: print("QWenLLMExtensionAddon send_cmd done")) else: - print("unknown cmd", cmdName) + logger.info("unknown cmd %s", cmdName) cmd_result = CmdResult.create(StatusCode.OK) rte.return_result(cmd_result, cmd) def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: - print("QWenLLMExtension on_cmd") + logger.info("QWenLLMExtension on_cmd") @register_addon_as_extension("qwen_llm_python") class QWenLLMExtensionAddon(Addon): def on_init(self, rte: Rte, manifest, property) -> None: - print("QWenLLMExtensionAddon on_init") + logger.info("QWenLLMExtensionAddon on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str) -> Extension: - print("QWenLLMExtensionAddon on_create_instance") - return QWenLLMExtension(addon_name) + def on_create_instance(self, rte: Rte, addon_name: str, context) -> Extension: + logger.info("on_create_instance") + rte.on_create_instance_done(QWenLLMExtension(addon_name), context) def on_deinit(self, rte: Rte) -> None: - print("QWenLLMExtensionAddon on_deinit") + logger.info("QWenLLMExtensionAddon on_deinit") rte.on_deinit_done() return From be5e01c46d6631007e757d7dd32f0d6690a6e1a3 Mon Sep 17 00:00:00 2001 From: tomasliu <67892682+tomasliu-agora@users.noreply.github.com> Date: Mon, 15 Jul 2024 00:28:38 +0800 Subject: [PATCH 37/72] fix cosy interrupt and safe exit (#100) --- agents/addon/extension/cosy_tts/main.py | 79 ++++--- .../addon/extension/qwen_llm_python/main.py | 11 +- agents/manifest.json.qwen.example | 205 ++++++++++++++++++ 3 files changed, 258 insertions(+), 37 deletions(-) create mode 100644 agents/manifest.json.qwen.example diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py index 3208be098..e9661f723 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/main.py @@ -51,7 +51,8 @@ def on_close(self): logger.info("websocket is closed.") def on_event(self, message): - logger.info(f"recv speech synthsis message {message}") + pass + #logger.info(f"recv speech synthsis message {message}") def get_frame(self, data: bytes) -> PcmFrame: f = PcmFrame.create("pcm_frame") @@ -61,9 +62,11 @@ def get_frame(self, data: bytes) -> PcmFrame: # f.set_timestamp = 0 f.set_data_fmt(RTE_PCM_FRAME_DATA_FMT.RTE_PCM_FRAME_DATA_FMT_INTERLEAVE) f.set_samples_per_channel(self.sample_rate // 100) - f.alloc_buf(len(data)) + f.alloc_buf(self.frame_size) buff = f.lock_buf() - buff[:] = data + if len(data) < self.frame_size: + buff[:] = bytes(self.frame_size) #fill with 0 + buff[:len(data)] = data f.unlock_buf(buff) return f @@ -74,7 +77,7 @@ def on_data(self, data: bytes) -> None: if self.canceled: return - logger.info("audio result length: %d, %d", len(data), self.frame_size) + #logger.info("audio result length: %d, %d", len(data), self.frame_size) try: chunk = int(len(data) / self.frame_size) offset = 0 @@ -151,7 +154,7 @@ def on_stop(self, rte: Rte) -> None: logger.info("CosyTTSExtension on_stop") self.stopped = True - self.tts.streaming_complete() + self.queue.put(None) self.flush() self.thread.join() rte.on_stop_done() @@ -164,40 +167,46 @@ def need_interrupt(self, ts: datetime.time) -> bool: return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 def async_handle(self, rte: Rte): - tts = None - callback = None - while not self.stopped: - try: - inputText, is_end, ts = self.queue.get() - if len(inputText) == 0: - if tts is not None: - tts.streaming_cancel() - if callback is not None: - callback.cancel() - tts = None - callback = None - continue - - if tts is None: - callback = CosyTTSCallback(rte, self.sample_rate) - tts = SpeechSynthesizer(model=self.model, voice=self.voice, format=self.format, callback=callback) - - logger.info("on message %s", inputText) - tts.streaming_call(inputText) - if is_end: - tts.streaming_complete() - tts = None - except Exception as e: - logger.exception(e) - finally: - tts = None - callback = None + try: + tts = None + callback = None + while not self.stopped: + try: + value = self.queue.get() + if value is None: + break + inputText, ts = value + if len(inputText) == 0: + logger.warning("empty input for interrupt") + if tts is not None: + tts.streaming_cancel() + if callback is not None: + callback.cancel() + tts = None + callback = None + continue + + if self.need_interrupt(ts): + continue + + if tts is None: + logger.info("creating tts") + callback = CosyTTSCallback(rte, self.sample_rate) + tts = SpeechSynthesizer(model=self.model, voice=self.voice, format=self.format, callback=callback) + + logger.info("on message %s", inputText) + tts.streaming_call(inputText) + except Exception as e: + logger.exception(e) + finally: + if tts is not None: + tts.streaming_complete() def flush(self): logger.info("CosyTTSExtension flush") while not self.queue.empty(): self.queue.get() - self.queue.put(("", True, datetime.now())) + self.queue.put(("", datetime.now())) def on_data(self, rte: Rte, data: Data) -> None: logger.info("CosyTTSExtension on_data") @@ -209,7 +218,7 @@ def on_data(self, rte: Rte, data: Data) -> None: is_end = data.get_property_bool("end_of_segment") logger.info("on data %s %d", inputText, is_end) - self.queue.put((inputText, is_end, datetime.now())) + self.queue.put((inputText, datetime.now())) def on_cmd(self, rte: Rte, cmd: Cmd) -> None: logger.info("CosyTTSExtension on_cmd") diff --git a/agents/addon/extension/qwen_llm_python/main.py b/agents/addon/extension/qwen_llm_python/main.py index 7f5977203..41a0cab0b 100644 --- a/agents/addon/extension/qwen_llm_python/main.py +++ b/agents/addon/extension/qwen_llm_python/main.py @@ -107,15 +107,18 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, inputText: str, messages for response in responses: if self.need_interrupt(ts): if len(self.ongoing) > 0: + self.on_msg('user', inputText) self.on_msg('assistant', self.ongoing) self.ongoing = '' logger.warning("out of date, %s, %s", self.outdateTs, ts) return if response.status_code == HTTPStatus.OK: temp = response.output.choices[0]['message']['content'] + if len(temp) == 0: + continue partial += temp self.ongoing += temp - if isEnd(temp) or len(partial) > 10: + if (isEnd(temp) and len(partial) > 10) or len(partial) > 50: d = Data.create("text_data") d.set_property_bool("end_of_segment", isEnd(partial)) d.set_property_string("text", partial) @@ -161,6 +164,7 @@ def on_start(self, rte: Rte) -> None: def on_stop(self, rte: Rte) -> None: logger.info("QWenLLMExtension on_stop") self.stopped = True + self.queue.put(None) self.flush() self.thread.join() rte.on_stop_done() @@ -194,7 +198,10 @@ def on_data(self, rte: Rte, data: Data) -> None: def async_handle(self, rte: Rte): while not self.stopped: try: - inputText, ts = self.queue.get() + value = self.queue.get() + if value is None: + break + inputText, ts = value if self.need_interrupt(ts): continue logger.info("fetch from queue %s", inputText) diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example new file mode 100644 index 000000000..3361faa38 --- /dev/null +++ b/agents/manifest.json.qwen.example @@ -0,0 +1,205 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.2.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.2.0-alpha" + }, + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "llm", + "addon": "qwen_llm_python", + "name": "qwen_llm", + "property": { + "api_key": "", + "model": "qwen-max", + "max_tokens": 512, + "prompt": "", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "llm" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ], + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "llm", + "extension": "qwen_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + } + ] + } + ] +} From 991745baca0f1fd8d19a5967e478da701d51ae34 Mon Sep 17 00:00:00 2001 From: Zhang Qianze Date: Mon, 15 Jul 2024 01:21:34 +0800 Subject: [PATCH 38/72] feat: support qwen as cn default in gateway server. readme update --- README-CN.md | 8 +- README.md | 8 +- agents/manifest.json.cn.example | 73 ++++++++++--------- ...xample => manifest.json.cn.openai.example} | 73 +++++++++---------- server/main.go | 5 ++ 5 files changed, 90 insertions(+), 77 deletions(-) rename agents/{manifest.json.qwen.example => manifest.json.cn.openai.example} (84%) diff --git a/README-CN.md b/README-CN.md index a02c0c471..12ef31744 100644 --- a/README-CN.md +++ b/README-CN.md @@ -144,10 +144,14 @@ export AGORA_APP_ID= export AGORA_APP_CERTIFICATE= export AZURE_STT_KEY= export AZURE_STT_REGION= -export OPENAI_API_KEY= -export COSY_TTS_KEY= +# LLM +export OPENAI_API_KEY= +export QWEN_API_KEY= +# TTS +# cosy +export COSY_TTS_KEY= # if you use AZURE_TTS export AZURE_TTS_KEY= export AZURE_TTS_REGION= diff --git a/README.md b/README.md index 4806bd37d..34ec7bbd6 100644 --- a/README.md +++ b/README.md @@ -160,10 +160,14 @@ export AGORA_APP_ID= export AGORA_APP_CERTIFICATE= export AZURE_STT_KEY= export AZURE_STT_REGION= -export OPENAI_API_KEY= -export COSY_TTS_KEY= +# LLM +export OPENAI_API_KEY= +export QWEN_API_KEY= +# TTS +# cosy +export COSY_TTS_KEY= # if you use AZURE_TTS export AZURE_TTS_KEY= export AZURE_TTS_REGION= diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example index 8a8f6b1e6..3361faa38 100644 --- a/agents/manifest.json.cn.example +++ b/agents/manifest.json.cn.example @@ -12,7 +12,7 @@ { "type": "extension", "name": "agora_rtc", - "version": "0.2.1" + "version": "0.2.0-alpha" }, { "type": "system", @@ -49,18 +49,14 @@ }, { "type": "extension", - "extension_group": "chatgpt", - "addon": "openai_chatgpt_python", - "name": "openai_chatgpt", + "extension_group": "llm", + "addon": "qwen_llm_python", + "name": "qwen_llm", "property": { - "base_url": "", - "api_key": "", - "frequency_penalty": 0.9, - "model": "gpt-3.5-turbo", + "api_key": "", + "model": "qwen-max", "max_tokens": 512, "prompt": "", - "proxy_url": "", - "greeting": "ASTRA agent connected. How can i help you today?", "max_memory_length": 10 } }, @@ -70,7 +66,7 @@ "addon": "cosy_tts", "name": "cosy_tts", "property": { - "api_key": "", + "api_key": "", "model": "cosyvoice-v1", "voice": "longxiaochun", "sample_rate": 16000 @@ -90,7 +86,7 @@ { "type": "extension_group", "addon": "default_extension_group", - "name": "chatgpt" + "name": "llm" }, { "type": "extension_group", @@ -109,10 +105,6 @@ { "extension_group": "default", "extension": "interrupt_detector" - }, - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" } ] } @@ -122,16 +114,42 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ], + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" } ] } ] }, { - "extension_group": "chatgpt", - "extension": "openai_chatgpt", + "extension_group": "llm", + "extension": "qwen_llm", "data": [ { "name": "text_data", @@ -180,21 +198,6 @@ ] } ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" - } - ] - } - ] } ] } diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.cn.openai.example similarity index 84% rename from agents/manifest.json.qwen.example rename to agents/manifest.json.cn.openai.example index 3361faa38..8a8f6b1e6 100644 --- a/agents/manifest.json.qwen.example +++ b/agents/manifest.json.cn.openai.example @@ -12,7 +12,7 @@ { "type": "extension", "name": "agora_rtc", - "version": "0.2.0-alpha" + "version": "0.2.1" }, { "type": "system", @@ -49,14 +49,18 @@ }, { "type": "extension", - "extension_group": "llm", - "addon": "qwen_llm_python", - "name": "qwen_llm", + "extension_group": "chatgpt", + "addon": "openai_chatgpt_python", + "name": "openai_chatgpt", "property": { - "api_key": "", - "model": "qwen-max", + "base_url": "", + "api_key": "", + "frequency_penalty": 0.9, + "model": "gpt-3.5-turbo", "max_tokens": 512, "prompt": "", + "proxy_url": "", + "greeting": "ASTRA agent connected. How can i help you today?", "max_memory_length": 10 } }, @@ -66,7 +70,7 @@ "addon": "cosy_tts", "name": "cosy_tts", "property": { - "api_key": "", + "api_key": "", "model": "cosyvoice-v1", "voice": "longxiaochun", "sample_rate": 16000 @@ -86,7 +90,7 @@ { "type": "extension_group", "addon": "default_extension_group", - "name": "llm" + "name": "chatgpt" }, { "type": "extension_group", @@ -105,6 +109,10 @@ { "extension_group": "default", "extension": "interrupt_detector" + }, + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" } ] } @@ -114,42 +122,16 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "llm", - "extension": "qwen_llm" - } - ] - } - ] - }, - { - "extension_group": "default", - "extension": "interrupt_detector", - "cmd": [ - { - "name": "flush", - "dest": [ - { - "extension_group": "llm", - "extension": "qwen_llm" - } - ] - } - ], - "data": [ - { - "name": "text_data", - "dest": [ - { - "extension_group": "llm", - "extension": "qwen_llm" + "extension_group": "chatgpt", + "extension": "openai_chatgpt" } ] } ] }, { - "extension_group": "llm", - "extension": "qwen_llm", + "extension_group": "chatgpt", + "extension": "openai_chatgpt", "data": [ { "name": "text_data", @@ -198,6 +180,21 @@ ] } ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "chatgpt", + "extension": "openai_chatgpt" + } + ] + } + ] } ] } diff --git a/server/main.go b/server/main.go index 0574609dd..20d1fff27 100644 --- a/server/main.go +++ b/server/main.go @@ -118,6 +118,11 @@ func processManifest(manifestJsonFile string) (err error) { manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="cosy_tts").property.api_key`, cosyTtsKey) } + qwenApiKey := os.Getenv("QWEN_API_KEY") + if qwenApiKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="qwen_llm").property.api_key`, qwenApiKey) + } + err = os.WriteFile(manifestJsonFile, []byte(manifestJson), 0644) return } From f620884f0343a761a266e3a18be3f80f59eb7507 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Tue, 16 Jul 2024 11:42:41 +0800 Subject: [PATCH 39/72] update cn default exmaple --- agents/manifest.json.cn.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example index 3361faa38..0040a8142 100644 --- a/agents/manifest.json.cn.example +++ b/agents/manifest.json.cn.example @@ -41,7 +41,7 @@ "publish_data": true, "enable_agora_asr": true, "agora_asr_vendor_name": "microsoft", - "agora_asr_language": "en-US", + "agora_asr_language": "zh-CN", "agora_asr_vendor_key": "", "agora_asr_vendor_region": "", "agora_asr_session_control_file_path": "session_control.conf" From 10c28f3d475dbd49837d92fef1a939fc26381fed Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Tue, 16 Jul 2024 12:36:35 +0800 Subject: [PATCH 40/72] update readme --- README.md | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 34ec7bbd6..fde47dfe7 100644 --- a/README.md +++ b/README.md @@ -161,14 +161,15 @@ export AGORA_APP_CERTIFICATE= export AZURE_STT_KEY= export AZURE_STT_REGION= -# LLM +# openai export OPENAI_API_KEY= +# qwen export QWEN_API_KEY= # TTS # cosy export COSY_TTS_KEY= -# if you use AZURE_TTS +# azure export AZURE_TTS_KEY= export AZURE_TTS_REGION= @@ -179,6 +180,36 @@ make run-server 🎉 Congratulations! You have created your first personalized voice agent. +

Quick Agent Customize Test

+The default agent control is managed via server gateway. For quick testing, you can also run the agent directly. + +``` + +# rename manifest example +cp ./agents/manifest.json.example ./agents/manifest.json +cp ./agents/manifest.json.en.example ./agents/manifest.en.json +cp ./agents/manifest.json.cn.example ./agents/manifest.cn.json + +# pull the docker image with dev tools and mount your current folder as workspace +docker run -itd -v $(pwd):/app -w /app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 + +# for windows git bash +# docker run -itd -v //$(pwd):/app -w //app -p 8080:8080 --name astra_agents_dev ghcr.io/rte-design/astra_agents_build:0.3.2 + +# enter docker image +docker exec -it astra_agents_dev bash + +make build + +cd ./agents +# manipulate values in manifest.json to replace , , , with your keys +./bin/start +``` + +use `https://webdemo.agora.io/` to quickly test. + +Note the `channel` and `remote_stream_id` needs to match with the one you give on `https://webdemo.agora.io/` +

ASTRA Service

From eb70f7842548e0181e17b4a67a657fa97976d388 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Tue, 16 Jul 2024 12:38:06 +0800 Subject: [PATCH 41/72] fix --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fde47dfe7..feb828974 100644 --- a/README.md +++ b/README.md @@ -206,9 +206,9 @@ cd ./agents ./bin/start ``` -use `https://webdemo.agora.io/` to quickly test. +use [https://webdemo.agora.io/](https://webdemo.agora.io/) to quickly test. -Note the `channel` and `remote_stream_id` needs to match with the one you give on `https://webdemo.agora.io/` +Note the `agora_appid` `channel` and `remote_stream_id` needs to match with the one you use on `https://webdemo.agora.io/`

ASTRA Service

From 0956adac4f2894fb60a651e84ae002a8665541ae Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Tue, 16 Jul 2024 12:46:49 +0800 Subject: [PATCH 42/72] fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index feb828974..3bba59392 100644 --- a/README.md +++ b/README.md @@ -208,7 +208,7 @@ cd ./agents use [https://webdemo.agora.io/](https://webdemo.agora.io/) to quickly test. -Note the `agora_appid` `channel` and `remote_stream_id` needs to match with the one you use on `https://webdemo.agora.io/` +Note the `channel` and `remote_stream_id` needs to match with the one you use on `https://webdemo.agora.io/`

ASTRA Service

From 5694a11af44323b39ddaeab13e435b75ba0c7158 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Tue, 16 Jul 2024 22:25:32 +0800 Subject: [PATCH 43/72] tmp remove greetings from openai block --- .../openai_chatgpt_extension.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 75ce1c7a8..fb6e801a7 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -172,15 +172,15 @@ def on_start(self, rte: Rte) -> None: logger.info(f"newOpenaiChatGPT failed, err: {err}") # Send greeting if available - if greeting: - try: - output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) - rte.send_data(output_data) - logger.info(f"greeting [{greeting}] sent") - except Exception as err: - logger.info(f"greeting [{greeting}] send failed, err: {err}") + # if greeting: + # try: + # output_data = Data.create("text_data") + # output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) + # output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + # rte.send_data(output_data) + # logger.info(f"greeting [{greeting}] sent") + # except Exception as err: + # logger.info(f"greeting [{greeting}] send failed, err: {err}") rte.on_start_done() def on_stop(self, rte: Rte) -> None: From d16eeadeb7426d5f2b2b4f89e8d5f543c80c0659 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Tue, 16 Jul 2024 22:28:38 +0800 Subject: [PATCH 44/72] fix: fix outdateTs issue --- agents/addon/extension/cosy_tts/main.py | 1 + .../openai_chatgpt_extension.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py index e9661f723..0bc2e52f8 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/main.py @@ -107,6 +107,7 @@ def __init__(self, name: str): self.tts = None self.callback = None self.format = None + self.outdateTs = datetime.now() self.stopped = False self.thread = None diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index fb6e801a7..75ce1c7a8 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -172,15 +172,15 @@ def on_start(self, rte: Rte) -> None: logger.info(f"newOpenaiChatGPT failed, err: {err}") # Send greeting if available - # if greeting: - # try: - # output_data = Data.create("text_data") - # output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) - # output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) - # rte.send_data(output_data) - # logger.info(f"greeting [{greeting}] sent") - # except Exception as err: - # logger.info(f"greeting [{greeting}] send failed, err: {err}") + if greeting: + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + rte.send_data(output_data) + logger.info(f"greeting [{greeting}] sent") + except Exception as err: + logger.info(f"greeting [{greeting}] send failed, err: {err}") rte.on_start_done() def on_stop(self, rte: Rte) -> None: From 2e7e754f3e86f819a0cae0d88f306ede919b7c3a Mon Sep 17 00:00:00 2001 From: Ethan Zhang Date: Tue, 16 Jul 2024 23:52:11 +0800 Subject: [PATCH 45/72] Dev/fix cosy bug (#105) (#106) * fix cosy interrupt and safe exit * fix init Co-authored-by: tomasliu <67892682+tomasliu-agora@users.noreply.github.com> --- .../addon/extension/qwen_llm_python/main.py | 4 +- agents/manifest.json.qwen.example | 205 ++++++++++++++++++ 2 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 agents/manifest.json.qwen.example diff --git a/agents/addon/extension/qwen_llm_python/main.py b/agents/addon/extension/qwen_llm_python/main.py index 41a0cab0b..e7c8da2c6 100644 --- a/agents/addon/extension/qwen_llm_python/main.py +++ b/agents/addon/extension/qwen_llm_python/main.py @@ -96,7 +96,7 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, inputText: str, messages messages.append({'role':'user', 'content':inputText}) logger.info("before call %s %s", messages, ts) - responses = dashscope.Generation.call("qwen-max", + responses = dashscope.Generation.call(self.model, messages=messages, result_format='message', # set the result to be "message" format. stream=True, # set streaming output @@ -152,7 +152,7 @@ def on_init( def on_start(self, rte: Rte) -> None: logger.info("QWenLLMExtension on_start") self.api_key = rte.get_property_string("api_key") - self.mode = rte.get_property_string("model") + self.model = rte.get_property_string("model") self.prompt = rte.get_property_string("prompt") self.max_history = rte.get_property_int("max_memory_length") diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example new file mode 100644 index 000000000..3361faa38 --- /dev/null +++ b/agents/manifest.json.qwen.example @@ -0,0 +1,205 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.2.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.2.0-alpha" + }, + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "llm", + "addon": "qwen_llm_python", + "name": "qwen_llm", + "property": { + "api_key": "", + "model": "qwen-max", + "max_tokens": 512, + "prompt": "", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "cosy_tts", + "name": "cosy_tts", + "property": { + "api_key": "", + "model": "cosyvoice-v1", + "voice": "longxiaochun", + "sample_rate": 16000 + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "llm" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ], + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "llm", + "extension": "qwen_llm" + } + ] + } + ] + }, + { + "extension_group": "llm", + "extension": "qwen_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "cosy_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "cosy_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + } + ] + } + ] +} From d73e73bf53a62382dcdba3c8d6ef470e1e385c16 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 17 Jul 2024 07:18:45 +0000 Subject: [PATCH 46/72] feat: use buf for data stream --- .../chat_transcriber_extension.py | 10 ++++++++-- agents/manifest.json.qwen.example | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index aa2fcf0c6..a75d5f8b3 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -52,7 +52,7 @@ def on_deinit(self, rte: Rte) -> None: def on_cmd(self, rte: Rte, cmd: Cmd) -> None: logger.info("on_cmd") cmd_json = cmd.to_json() - logger.info("on_cmd json: " % cmd_json) + logger.info("on_cmd json: {}".format(cmd_json)) cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") @@ -138,7 +138,13 @@ def on_data(self, rte: Rte, data: Data) -> None: try: # convert the origin text data to the protobuf data and send it to the graph. rte_data = Data.create("data") - rte_data.set_property_string("data", pb_serialized_text) + # rte_data.set_property_string("data", pb_serialized_text) + rte_data.alloc_buf(len(pb_serialized_text)) + buf = rte_data.lock_buf() + buf[:] = pb_serialized_text[:] + rte_data.unlock_buf(buf) + rte.send_data(rte_data) + logger.info("data sent") except Exception as e: logger.warning(f"on_data new_data error: {e}") return diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example index 3361faa38..cf785029b 100644 --- a/agents/manifest.json.qwen.example +++ b/agents/manifest.json.qwen.example @@ -12,7 +12,7 @@ { "type": "extension", "name": "agora_rtc", - "version": "0.2.0-alpha" + "version": "0.3.0-databuf" }, { "type": "system", From 4dc9f2a6f7e0f7c736855d7b202a1e92a05883f3 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 17 Jul 2024 07:46:17 +0000 Subject: [PATCH 47/72] fix: invalid argument --- .../chat_transcriber_python/chat_transcriber_extension.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index a75d5f8b3..81b3e21bd 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -149,9 +149,6 @@ def on_data(self, rte: Rte, data: Data) -> None: logger.warning(f"on_data new_data error: {e}") return - rte.send_data(rte_data) - - @register_addon_as_extension("chat_transcriber_python") class ChatTranscriberExtensionAddon(Addon): def on_init(self, rte: Rte, manifest, property) -> None: From 4c092b4aec34620b798ceb5bc312d7b86f8c53fb Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 17 Jul 2024 07:47:44 +0000 Subject: [PATCH 48/72] feat: add chat_transcriber in graph --- agents/manifest.json.qwen.example | 56 ++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example index cf785029b..5a28a16ee 100644 --- a/agents/manifest.json.qwen.example +++ b/agents/manifest.json.qwen.example @@ -72,6 +72,12 @@ "sample_rate": 16000 } }, + { + "type": "extension", + "extension_group": "chat_transcriber", + "addon": "chat_transcriber_python", + "name": "chat_transcriber" + }, { "type": "extension", "extension_group": "default", @@ -92,6 +98,11 @@ "type": "extension_group", "addon": "default_extension_group", "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chat_transcriber" } ], "connections": [ @@ -105,6 +116,10 @@ { "extension_group": "default", "extension": "interrupt_detector" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber" } ] } @@ -157,7 +172,31 @@ { "extension_group": "tts", "extension": "cosy_tts" - } + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } ] } ], @@ -198,6 +237,21 @@ ] } ] + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest":[ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] } ] } From 7f7972ac4feb9745a328d49d4bc201428fb55ce2 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Wed, 17 Jul 2024 18:19:37 +0800 Subject: [PATCH 49/72] feat: support json dump for chat transcriber --- .../chat_transcriber_extension.py | 12 +++++++++--- agents/property.json | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index 81b3e21bd..eae3d1b49 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -6,6 +6,7 @@ # # +import json from rte_runtime_python import ( Addon, Extension, @@ -130,7 +131,12 @@ def on_data(self, rte: Rte, data: Data) -> None: ) try: - pb_serialized_text = pb_text.SerializeToString() + text = json.dumps({ + "uid": stream_id, + "text" :text, + "is_final": end_of_segment, + }) + text_buf = text.encode("utf-8") except Exception as e: logger.warning(f"on_data SerializeToString error: {e}") return @@ -139,9 +145,9 @@ def on_data(self, rte: Rte, data: Data) -> None: # convert the origin text data to the protobuf data and send it to the graph. rte_data = Data.create("data") # rte_data.set_property_string("data", pb_serialized_text) - rte_data.alloc_buf(len(pb_serialized_text)) + rte_data.alloc_buf(len(text_buf)) buf = rte_data.lock_buf() - buf[:] = pb_serialized_text[:] + buf[:] = text_buf[:] rte_data.unlock_buf(buf) rte.send_data(rte_data) logger.info("data sent") diff --git a/agents/property.json b/agents/property.json index 03c192fc2..25c3d21a6 100644 --- a/agents/property.json +++ b/agents/property.json @@ -1,5 +1,5 @@ { "rte": { - "log_level": 3 + "log_level": 1 } } From 964d6267701472c891f45969baa9cfbf351ac531 Mon Sep 17 00:00:00 2001 From: zhangqianze Date: Thu, 18 Jul 2024 14:33:58 +0800 Subject: [PATCH 50/72] fix: fix cosy_tts websocket closed issue --- agents/addon/extension/cosy_tts/main.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/main.py index 0bc2e52f8..d068b4f27 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/main.py @@ -5,6 +5,7 @@ # Copyright (c) 2024 Agora IO. All rights reserved. # # +import traceback from rte_runtime_python import ( Addon, Extension, @@ -37,6 +38,7 @@ def __init__(self, rte: Rte, sample_rate: int): self.sample_rate = sample_rate self.frame_size = int(self.sample_rate * 1 * 2 / 100) self.canceled = False + self.closed = False def on_open(self): logger.info("websocket is open.") @@ -49,10 +51,11 @@ def on_error(self, message: str): def on_close(self): logger.info("websocket is closed.") + self.closed = True def on_event(self, message): pass - #logger.info(f"recv speech synthsis message {message}") + # logger.info(f"recv speech synthsis message {message}") def get_frame(self, data: bytes) -> PcmFrame: f = PcmFrame.create("pcm_frame") @@ -180,7 +183,10 @@ def async_handle(self, rte: Rte): if len(inputText) == 0: logger.warning("empty input for interrupt") if tts is not None: - tts.streaming_cancel() + try: + tts.streaming_cancel() + except Exception as e: + logger.exception(e) if callback is not None: callback.cancel() tts = None @@ -190,6 +196,9 @@ def async_handle(self, rte: Rte): if self.need_interrupt(ts): continue + if callback is not None and callback.closed is True: + tts = None + if tts is None: logger.info("creating tts") callback = CosyTTSCallback(rte, self.sample_rate) @@ -199,6 +208,7 @@ def async_handle(self, rte: Rte): tts.streaming_call(inputText) except Exception as e: logger.exception(e) + logger.exception(traceback.format_exc()) finally: if tts is not None: tts.streaming_complete() From 1f98029da992d7a398d4ae9d0bed1e11e809e3ad Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Mon, 22 Jul 2024 06:37:37 +0000 Subject: [PATCH 51/72] feat: lock rtc ext version --- agents/manifest.json.qwen.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example index 5a28a16ee..989d70160 100644 --- a/agents/manifest.json.qwen.example +++ b/agents/manifest.json.qwen.example @@ -12,7 +12,7 @@ { "type": "extension", "name": "agora_rtc", - "version": "0.3.0-databuf" + "version": "=0.3.0-databuf" }, { "type": "system", From 65b5a15e9394434ae4789f670cac73f81f663d37 Mon Sep 17 00:00:00 2001 From: Bin Chen Date: Wed, 24 Jul 2024 11:11:02 +0800 Subject: [PATCH 52/72] bedrock_llm_extension: add support for Amazon Bedrock Foundation Models (#111) Co-authored-by: Chen188 --- .../extension/bedrock_llm_python/README.md | 12 + .../extension/bedrock_llm_python/__init__.py | 4 + .../bedrock_llm_python/bedrock_llm.py | 75 ++++ .../bedrock_llm_extension.py | 354 +++++++++++++++ .../addon/extension/bedrock_llm_python/log.py | 13 + .../bedrock_llm_python/manifest.json | 68 +++ .../bedrock_llm_python/property.json | 1 + .../bedrock_llm_python/requirements.txt | 4 + agents/manifest.json.cn.bedrock.example | 200 +++++++++ agents/manifest.json.en.bedrock.example | 200 +++++++++ bedrock_llm/bedrock_llm_extension.go | 402 ++++++++++++++++++ server/main.go | 20 + 12 files changed, 1353 insertions(+) create mode 100644 agents/addon/extension/bedrock_llm_python/README.md create mode 100644 agents/addon/extension/bedrock_llm_python/__init__.py create mode 100644 agents/addon/extension/bedrock_llm_python/bedrock_llm.py create mode 100644 agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py create mode 100644 agents/addon/extension/bedrock_llm_python/log.py create mode 100644 agents/addon/extension/bedrock_llm_python/manifest.json create mode 100644 agents/addon/extension/bedrock_llm_python/property.json create mode 100644 agents/addon/extension/bedrock_llm_python/requirements.txt create mode 100644 agents/manifest.json.cn.bedrock.example create mode 100644 agents/manifest.json.en.bedrock.example create mode 100644 bedrock_llm/bedrock_llm_extension.go diff --git a/agents/addon/extension/bedrock_llm_python/README.md b/agents/addon/extension/bedrock_llm_python/README.md new file mode 100644 index 000000000..7d9bd7139 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/README.md @@ -0,0 +1,12 @@ +## Amazon Bedrock LLM Extension + +### Configurations + +You can config this extension by providing following environments: + +| Env | Required | Default | Notes | +| -- | -- | -- | -- | +| AWS_REGION | No | us-east-1 | The Region of Amazon Bedrock service you want to use. | +| AWS_ACCESS_KEY_ID | No | - | Access Key of your IAM User, make sure you've set proper permissions to [invoke Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html) and gain [models access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) in Bedrock. Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | +| AWS_SECRET_ACCESS_KEY | No | - | Secret Key of your IAM User, make sure you've set proper permissions to [invoke Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html) and gain [models access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) in Bedrock. Will use default credentials provider if not provided. Check [document](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html). | +| AWS_BEDROCK_MODEL | No | Claude 3.5(anthropic.claude-3-5-sonnet-20240620-v1:0) | Bedrock model id, check [docuement](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns). | \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/__init__.py b/agents/addon/extension/bedrock_llm_python/__init__.py new file mode 100644 index 000000000..af911d6c6 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/__init__.py @@ -0,0 +1,4 @@ +from . import bedrock_llm_extension +from .log import logger + +logger.info("bedrock_llm_extension extension loaded") diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm.py new file mode 100644 index 000000000..c833f7b7c --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm.py @@ -0,0 +1,75 @@ +import boto3 +from .log import logger + +class BedrockLLMConfig: + def __init__(self, + region: str, + access_key: str, + secret_key: str, + model: str, + prompt: str, + top_p: float, + temperature: float, + max_tokens: int): + self.region = region + self.access_key = access_key + self.secret_key = secret_key + self.model = model + self.prompt = prompt + self.top_p = top_p + self.temperature = temperature + self.max_tokens = max_tokens + + @classmethod + def default_config(cls): + return cls( + region="us-east-1", + access_key="", + secret_key="", + model="anthropic.claude-3-5-sonnet-20240620-v1:0", # Defaults to Claude 3.5, supported model list: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html + # system prompt + prompt="You are a voice assistant who talks in a conversational way and can chat with me like my friends. I will speak to you in English or Chinese, and you will answer in the corrected and improved version of my text with the language I use. Don’t talk like a robot, instead I would like you to talk like a real human with emotions. I will use your answer for text-to-speech, so don’t return me any meaningless characters. I want you to be helpful, when I’m asking you for advice, give me precise, practical and useful advice instead of being vague. When giving me a list of options, express the options in a narrative way instead of bullet points.", + top_p=1.0, + temperature=0.1, + max_tokens=512, + ) + +class BedrockLLM: + client = None + def __init__(self, config: BedrockLLMConfig): + self.config = config + + if config.access_key and config.secret_key: + logger.info(f"BedrockLLM initialized with access key: {config.access_key}") + + self.client = boto3.client(service_name='bedrock-runtime', + region_name=config.region, + aws_access_key_id=config.access_key, + aws_secret_access_key=config.secret_key) + else: + logger.info(f"BedrockLLM initialized without access key, using default credentials provider chain.") + self.client = boto3.client(service_name='bedrock-runtime', region_name=config.region) + + def get_converse_stream(self, messages): + bedrock_req_params = { + "modelId": self.config.model, + "messages": messages, + "inferenceConfig": { + "temperature": self.config.temperature, + "maxTokens": self.config.max_tokens, + "topP": self.config.top_p, + # "stopSequences": [], + }, + # "additionalModelRequestFields": additional_model_fields, + } + + if self.config.prompt: + bedrock_req_params['system'] = [ + {'text': self.config.prompt} + ] + + try: + response = self.client.converse_stream(**bedrock_req_params) + return response + except Exception as e: + raise Exception(f"GetConverseStream failed, err: {e}") \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py new file mode 100644 index 000000000..43153cd8d --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py @@ -0,0 +1,354 @@ +from .bedrock_llm import BedrockLLM, BedrockLLMConfig +from datetime import datetime +from threading import Thread +from rte_runtime_python import ( + Addon, + Extension, + register_addon_as_extension, + Rte, + Cmd, + Data, + StatusCode, + CmdResult, + MetadataInfo, + RTE_PIXEL_FMT, +) +from rte_runtime_python.image_frame import ImageFrame +from PIL import Image, ImageFilter +from .log import logger + + +CMD_IN_FLUSH = "flush" +CMD_OUT_FLUSH = "flush" +DATA_IN_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL = "is_final" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT = "text" +DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT = "end_of_segment" + +PROPERTY_REGION = "region" # Optional +PROPERTY_ACCESS_KEY = "access_key" # Optional +PROPERTY_SECRET_KEY = "secret_key" # Optional +PROPERTY_MODEL = "model" # Optional +PROPERTY_PROMPT = "prompt" # Optional +PROPERTY_TEMPERATURE = "temperature" # Optional +PROPERTY_TOP_P = "top_p" # Optional +PROPERTY_MAX_TOKENS = "max_tokens" # Optional +PROPERTY_GREETING = "greeting" # Optional +PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional + +def get_current_time(): + # Get the current time + start_time = datetime.now() + # Get the number of microseconds since the Unix epoch + unix_microseconds = int(start_time.timestamp() * 1_000_000) + return unix_microseconds + +def is_punctuation(char): + if char in [',', ',', '.', '。', '?', '?', '!', '!']: + return True + return False + +def parse_sentence(sentence, content): + remain = "" + found_punc = False + + for char in content: + if not found_punc: + sentence += char + else: + remain += char + + if not found_punc and is_punctuation(char): + found_punc = True + + return sentence, remain, found_punc + +class BedrockLLMExtension(Extension): + memory = [] + max_memory_length = 10 + outdate_ts = 0 + bedrock_llm = None + + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + logger.info("BedrockLLMExtension on_init") + rte.on_init_done(manifest, property) + + def on_start(self, rte: Rte) -> None: + logger.info("BedrockLLMExtension on_start") + # Prepare configuration + bedrock_llm_config = BedrockLLMConfig.default_config() + + try: + region = rte.get_property_string(PROPERTY_REGION) + if region: + bedrock_llm_config.region = region + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_REGION} failed, err: {err}. Using default value: {bedrock_llm_config.region}") + return + + try: + access_key = rte.get_property_string(PROPERTY_ACCESS_KEY) + bedrock_llm_config.access_key = access_key + except Exception as err: + logger.error(f"GetProperty optional {PROPERTY_ACCESS_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.access_key}") + return + + try: + secret_key = rte.get_property_string(PROPERTY_SECRET_KEY) + bedrock_llm_config.secret_key = secret_key + except Exception as err: + logger.error(f"GetProperty optional {PROPERTY_SECRET_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.secret_key}") + return + + try: + model = rte.get_property_string(PROPERTY_MODEL) + if model: + bedrock_llm_config.model = model + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_MODEL} error: {err}. Using default value: {bedrock_llm_config.model}") + + try: + prompt = rte.get_property_string(PROPERTY_PROMPT) + if prompt: + bedrock_llm_config.prompt = prompt + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_PROMPT} error: {err}. Using default value: {bedrock_llm_config.prompt}") + + try: + temperature = rte.get_property_float(PROPERTY_TEMPERATURE) + bedrock_llm_config.temperature = float(temperature) + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}. Using default value: {bedrock_llm_config.temperature}") + + try: + top_p = rte.get_property_float(PROPERTY_TOP_P) + bedrock_llm_config.top_p = float(top_p) + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}. Using default value: {bedrock_llm_config.top_p}") + + try: + max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) + if max_tokens > 0: + bedrock_llm_config.max_tokens = int(max_tokens) + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}. Using default value: {bedrock_llm_config.max_tokens}") + + try: + greeting = rte.get_property_string(PROPERTY_GREETING) + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}.") + + try: + prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) + if prop_max_memory_length > 0: + self.max_memory_length = int(prop_max_memory_length) + except Exception as err: + logger.debug(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}.") + + # Create bedrockLLM instance + try: + self.bedrock_llm = BedrockLLM(bedrock_llm_config) + logger.info(f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}") + except Exception as err: + logger.info(f"newBedrockLLM failed, err: {err}") + + # Send greeting if available + if greeting: + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + rte.send_data(output_data) + logger.info(f"greeting [{greeting}] sent") + except Exception as err: + logger.info(f"greeting [{greeting}] send failed, err: {err}") + rte.on_start_done() + + def on_stop(self, rte: Rte) -> None: + logger.info("BedrockLLMExtension on_stop") + rte.on_stop_done() + + def on_deinit(self, rte: Rte) -> None: + logger.info("BedrockLLMExtension on_deinit") + rte.on_deinit_done() + + def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + logger.info("BedrockLLMExtension on_cmd") + cmd_json = cmd.to_json() + logger.info("BedrockLLMExtension on_cmd json: " + cmd_json) + + cmd_name = cmd.get_name() + + if cmd_name == CMD_IN_FLUSH: + self.outdate_ts = get_current_time() + cmd_out = Cmd.create(CMD_OUT_FLUSH) + rte.send_cmd(cmd_out, None) + logger.info(f"BedrockLLMExtension on_cmd sent flush") + else: + logger.info(f"BedrockLLMExtension on_cmd unknown cmd: {cmd_name}") + cmd_result = CmdResult.create(StatusCode.ERROR) + cmd_result.set_property_string("detail", "unknown cmd") + rte.return_result(cmd_result, cmd) + return + + cmd_result = CmdResult.create(StatusCode.OK) + cmd_result.set_property_string("detail", "success") + rte.return_result(cmd_result, cmd) + + def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: + logger.info("BedrockLLMExtension on_cmd") + + def on_data(self, rte: Rte, data: Data) -> None: + """ + on_data receives data from rte graph. + current supported data: + - name: text_data + example: + {name: text_data, properties: {text: "hello"} + """ + logger.info(f"BedrockLLMExtension on_data") + + # Assume 'data' is an object from which we can get properties + try: + is_final = data.get_property_bool(DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL) + if not is_final: + logger.info("ignore non-final input") + return + except Exception as err: + logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + return + + # Get input text + try: + input_text = data.get_property_string(DATA_IN_TEXT_DATA_PROPERTY_TEXT) + if not input_text: + logger.info("ignore empty text") + return + logger.info(f"OnData input text: [{input_text}]") + except Exception as err: + logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + return + + # Prepare memory. A conversation must alternate between user and assistant roles + while len(self.memory): + if len(self.memory) > self.max_memory_length: + logger.debug(f'pop out first message, reason: memory length limit: `{self.memory[0]}`') + self.memory.pop(0) + elif self.memory[0]['role'] == 'assistant': + logger.debug(f'pop out first message, reason: messages can not start with assistant: `{self.memory[0]}`') + self.memory.pop(0) + else: + break + + if len(self.memory) and self.memory[-1]['role'] == 'user': + # if last user input got empty response, append current user input. + logger.debug(f'found last message with role `user`, will append this input into last user input') + self.memory[-1]['content'].append( + {'text': input_text} + ) + else: + self.memory.append({"role": "user", "content": [ + {'text': input_text} + ]}) + + def converse_stream_worker(start_time, input_text, memory): + try: + logger.info(f"GetConverseStream for input text: [{input_text}] memory: {memory}") + + # Get result from Bedrock + resp = self.bedrock_llm.get_converse_stream(memory) + if resp is None or resp.get('stream') is None: + logger.info(f"GetConverseStream for input text: [{input_text}] failed") + return + + stream = resp.get('stream') + sentence = "" + full_content = "" + first_sentence_sent = False + + for event in stream: + if start_time < self.outdate_ts: + logger.info(f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}") + break + + if 'contentBlockDelta' in event: + delta_types = event['contentBlockDelta']['delta'].keys() + # ignore other types of content: e.g toolUse + if 'text' in delta_types: + content = event['contentBlockDelta']['delta']['text'] + elif 'internalServerException' in event or 'modelStreamErrorException' in event \ + or 'throttlingException' in event or 'validationException' in event: + logger.error(f"GetConverseStream Error occured: {event}") + break + else: + # ingore other events + continue + + full_content += content + + while True: + sentence, content, sentence_is_final = parse_sentence(sentence, content) + if len(sentence) == 0 or not sentence_is_final: + logger.info(f"sentence {sentence} is empty or not final") + break + logger.info(f"GetConverseStream recv for input text: [{input_text}] got sentence: [{sentence}]") + + # send sentence + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False) + rte.send_data(output_data) + logger.info(f"GetConverseStream recv for input text: [{input_text}] sent sentence [{sentence}]") + except Exception as err: + logger.info(f"GetConverseStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}") + break + + sentence = "" + if not first_sentence_sent: + first_sentence_sent = True + logger.info(f"GetConverseStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms") + + if len(full_content.strip()): + # remember response as assistant content in memory + memory.append({"role": "assistant", "content": [{"text": full_content}]}) + else: + # can not put empty model response into memory + logger.error(f"GetConverseStream recv for input text: [{input_text}] failed: empty response [{full_content}]") + return + + # send end of segment + try: + output_data = Data.create("text_data") + output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) + output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + rte.send_data(output_data) + logger.info(f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent") + except Exception as err: + logger.info(f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}") + + except Exception as e: + logger.info(f"GetConverseStream for input text: [{input_text}] failed, err: {e}") + + # Start thread to request and read responses from OpenAI + start_time = get_current_time() + thread = Thread(target=converse_stream_worker, args=(start_time, input_text, self.memory)) + thread.start() + logger.info(f"BedrockLLMExtension on_data end") + + +@register_addon_as_extension("bedrock_llm_python") +class BedrockLLMExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("BedrockLLMExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + rte.on_create_instance_done(BedrockLLMExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("BedrockLLMExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/bedrock_llm_python/log.py b/agents/addon/extension/bedrock_llm_python/log.py new file mode 100644 index 000000000..7261cd015 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/log.py @@ -0,0 +1,13 @@ +import logging + +logger = logging.getLogger("bedrock_llm_python") +logger.setLevel(logging.INFO) + +formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(process)d - [%(filename)s:%(lineno)d] - %(message)s" +) + +console_handler = logging.StreamHandler() +console_handler.setFormatter(formatter) + +logger.addHandler(console_handler) diff --git a/agents/addon/extension/bedrock_llm_python/manifest.json b/agents/addon/extension/bedrock_llm_python/manifest.json new file mode 100644 index 000000000..bd876e4fd --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/manifest.json @@ -0,0 +1,68 @@ +{ + "type": "extension", + "name": "bedrock_llm_python", + "version": "0.1.0", + "language": "python", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "api": { + "property": { + "access_key": { + "type": "string" + }, + "secret_key": { + "type": "string" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/property.json b/agents/addon/extension/bedrock_llm_python/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/bedrock_llm_python/requirements.txt b/agents/addon/extension/bedrock_llm_python/requirements.txt new file mode 100644 index 000000000..40ddd4741 --- /dev/null +++ b/agents/addon/extension/bedrock_llm_python/requirements.txt @@ -0,0 +1,4 @@ +pillow==10.4.0 +# openai==1.35.13 +# requests==2.32.3 +boto3==1.34.143 \ No newline at end of file diff --git a/agents/manifest.json.cn.bedrock.example b/agents/manifest.json.cn.bedrock.example new file mode 100644 index 000000000..7963b536b --- /dev/null +++ b/agents/manifest.json.cn.bedrock.example @@ -0,0 +1,200 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.2.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.2.1" + }, + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + } + ] +} diff --git a/agents/manifest.json.en.bedrock.example b/agents/manifest.json.en.bedrock.example new file mode 100644 index 000000000..7963b536b --- /dev/null +++ b/agents/manifest.json.en.bedrock.example @@ -0,0 +1,200 @@ +{ + "type": "app", + "name": "astra_agents", + "version": "0.2.0", + "language": "python", + "dependencies": [ + { + "type": "extension_group", + "name": "default_extension_group", + "version": "0.2.0" + }, + { + "type": "extension", + "name": "agora_rtc", + "version": "0.2.1" + }, + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.2.0" + } + ], + "predefined_graphs": [ + { + "name": "astra_agents", + "auto_start": true, + "nodes": [ + { + "type": "extension", + "extension_group": "default", + "addon": "agora_rtc", + "name": "agora_rtc", + "property": { + "app_id": "", + "token": "", + "channel": "astra_agents_test", + "stream_id": 1234, + "remote_stream_id": 123, + "subscribe_audio": true, + "publish_audio": true, + "publish_data": true, + "enable_agora_asr": true, + "agora_asr_vendor_name": "microsoft", + "agora_asr_language": "en-US", + "agora_asr_vendor_key": "", + "agora_asr_vendor_region": "", + "agora_asr_session_control_file_path": "session_control.conf" + } + }, + { + "type": "extension", + "extension_group": "bedrock", + "addon": "bedrock_llm_python", + "name": "bedrock_llm", + "property": { + "region": "us-east-1", + "access_key": "", + "secret_key": "", + "model": "anthropic.claude-3-5-sonnet-20240620-v1:0", + "max_tokens": 512, + "prompt": "", + "greeting": "ASTRA agent connected. How can i help you today?", + "max_memory_length": 10 + } + }, + { + "type": "extension", + "extension_group": "tts", + "addon": "azure_tts", + "name": "azure_tts", + "property": { + "azure_subscription_key": "", + "azure_subscription_region": "", + "azure_synthesis_voice_name": "en-US-JaneNeural" + } + }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector_python", + "name": "interrupt_detector" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "default" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "bedrock" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "tts" + } + ], + "connections": [ + { + "extension_group": "default", + "extension": "agora_rtc", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + }, + { + "extension_group": "bedrock", + "extension": "bedrock_llm", + "data": [ + { + "name": "text_data", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "tts", + "extension": "azure_tts" + } + ] + } + ] + }, + { + "extension_group": "tts", + "extension": "azure_tts", + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ], + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, + { + "extension_group": "default", + "extension": "interrupt_detector", + "cmd": [ + { + "name": "flush", + "dest": [ + { + "extension_group": "bedrock", + "extension": "bedrock_llm" + } + ] + } + ] + } + ] + } + ] +} diff --git a/bedrock_llm/bedrock_llm_extension.go b/bedrock_llm/bedrock_llm_extension.go new file mode 100644 index 000000000..7c21d7066 --- /dev/null +++ b/bedrock_llm/bedrock_llm_extension.go @@ -0,0 +1,402 @@ +/** + * + * Agora Real Time Engagement + * Created by lixinhui in 2024. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "fmt" + "log/slog" + "sync" + "sync/atomic" + "time" + + "agora.io/rte/rtego" + "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" +) + +var ( + logTag = slog.String("extension", "BEDROCK_LLM_EXTENSION") +) + +type bedrockLLMExtension struct { + rtego.DefaultExtension + bedrockLLM *bedrockLLM +} + +const ( + cmdInFlush = "flush" + cmdOutFlush = "flush" + dataInTextDataPropertyText = "text" + dataInTextDataPropertyIsFinal = "is_final" + dataOutTextDataPropertyText = "text" + dataOutTextDataPropertyTextEndOfSegment = "end_of_segment" + + propertyRegion = "region" // Optional + propertyAccessKey = "access_key" // Required + propertySecretKey = "secret_key" // Required + propertyModel = "model" // Optional + propertyPrompt = "prompt" // Optional + propertyTemperature = "temperature" // Optional + propertyTopP = "top_p" // Optional + propertyMaxTokens = "max_tokens" // Optional + propertyGreeting = "greeting" // Optional + propertyMaxMemoryLength = "max_memory_length" // Optional +) + +var ( + memory []types.Message + memoryChan chan types.Message + maxMemoryLength = 10 + + outdateTs atomic.Int64 + wg sync.WaitGroup +) + +func newBedrockLLMExtension(name string) rtego.Extension { + return &bedrockLLMExtension{} +} + +// OnStart will be called when the extension is starting, +// properies can be read here to initialize and start the extension. +// current supported properties: +// - region (optional) +// - access_key (required) +// - secret_key (required) +// - prompt +// - temperature +// - top_p +// - max_tokens +// - greeting +func (p *bedrockLLMExtension) OnStart(rte rtego.Rte) { + slog.Info("OnStart", logTag) + + // prepare configuration + bedrockLLMConfig := defaultBedrockLLMConfig() + + if accessKey, err := rte.GetPropertyString(propertyAccessKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyAccessKey, err), logTag) + } else { + if len(accessKey) > 0 { + bedrockLLMConfig.AccessKey = accessKey + } + } + if secretKey, err := rte.GetPropertyString(propertySecretKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertySecretKey, err), logTag) + } else { + if len(secretKey) > 0 { + bedrockLLMConfig.SecretKey = secretKey + } + } + + if model, err := rte.GetPropertyString(propertyModel); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) + } else { + if len(model) > 0 { + bedrockLLMConfig.Model = model + } + } + + if prompt, err := rte.GetPropertyString(propertyPrompt); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) + } else { + if len(prompt) > 0 { + bedrockLLMConfig.Prompt = prompt + } + } + + if temperature, err := rte.GetPropertyFloat64(propertyTemperature); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) + } else { + bedrockLLMConfig.Temperature = float32(temperature) + } + + if topP, err := rte.GetPropertyFloat64(propertyTopP); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) + } else { + bedrockLLMConfig.TopP = float32(topP) + } + + if maxTokens, err := rte.GetPropertyInt64(propertyMaxTokens); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) + } else { + if maxTokens > 0 { + bedrockLLMConfig.MaxTokens = int32(maxTokens) + } + } + + greeting, err := rte.GetPropertyString(propertyGreeting) + if err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) + } + + if propMaxMemoryLength, err := rte.GetPropertyInt64(propertyMaxMemoryLength); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) + } else { + if propMaxMemoryLength > 0 { + maxMemoryLength = int(propMaxMemoryLength) + } + } + + // create bedrockLLM instance + bedrockLLM, err := newBedrockLLM(bedrockLLMConfig) + if err != nil { + slog.Error(fmt.Sprintf("newBedrockLLM failed, err: %v", err), logTag) + return + } + slog.Info(fmt.Sprintf("newBedrockLLM succeed with max_tokens: %d, model: %s", + bedrockLLMConfig.MaxTokens, bedrockLLMConfig.Model), logTag) + + p.bedrockLLM = bedrockLLM + + memoryChan = make(chan types.Message, maxMemoryLength*2) + + // send greeting if available + if len(greeting) > 0 { + outputData, _ := rtego.NewData("text_data") + outputData.SetProperty(dataOutTextDataPropertyText, greeting) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) + } else { + slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) + } + } + + rte.OnStartDone() +} + +// OnCmd receives cmd from rte graph. +// current supported cmd: +// - name: flush +// example: +// {"name": "flush"} +func (p *bedrockLLMExtension) OnCmd( + rte rtego.Rte, + cmd rtego.Cmd, +) { + cmdName, err := cmd.GetName() + if err != nil { + result, fatal := rtego.NewCmdResult(rtego.Error) + + if fatal != nil { + slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + } + rte.ReturnResult(result, cmd) + return + } + slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + + switch cmdName { + case cmdInFlush: + outdateTs.Store(time.Now().UnixMicro()) + + wg.Wait() // wait for chat completion stream to finish + + // send out + outCmd, err := rtego.NewCmd(cmdOutFlush) + if err != nil { + result, fatal := rtego.NewCmdResult(rtego.Error) + + if fatal != nil { + slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + } + rte.ReturnResult(result, cmd) + return + } + if err := rte.SendCmd(outCmd, nil); err != nil { + result, fatal := rtego.NewCmdResult(rtego.Error) + + if fatal != nil { + slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + } + rte.ReturnResult(result, cmd) + return + } else { + slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + } + } + + result, _ := rtego.NewCmdResult(rtego.Ok) + rte.ReturnResult(result, cmd) +} + +// OnData receives data from rte graph. +// current supported data: +// - name: text_data +// example: +// {"name": "text_data", "properties": {"text": "hello", "is_final": true} +func (p *bedrockLLMExtension) OnData( + rte rtego.Rte, + data rtego.Data, +) { + // Get isFinal + isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyIsFinal, err), logTag) + return + } + if !isFinal { // ignore non-final + slog.Debug("ignore non-final input", logTag) + return + } + + // Get input text + inputText, err := data.GetPropertyString(dataInTextDataPropertyText) + if err != nil { + slog.Error(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + return + } + if len(inputText) == 0 { + slog.Debug("ignore empty text", logTag) + return + } + slog.Info(fmt.Sprintf("OnData input text: [%s]", inputText), logTag) + + // prepare memory + for len(memoryChan) > 0 { + m, ok := <-memoryChan + if !ok { + break + } + memory = append(memory, m) + if len(memory) > maxMemoryLength { + memory = memory[1:] + } + } + + memory = append(memory, types.Message{ + Role: types.ConversationRoleUser, + Content: []types.ContentBlock{ + &types.ContentBlockMemberText{ + Value: inputText, + }, + }, + }) + + if len(memory) > maxMemoryLength { + memory = memory[1:] + } + + // start goroutine to request and read responses from bedrock + wg.Add(1) + go func(startTime time.Time, inputText string, memory []types.Message) { + defer wg.Done() + slog.Info(fmt.Sprintf("getConvserseStream for input text: [%s] memory: %v", inputText, memory), logTag) + + // Get result from ai + resp, err := p.bedrockLLM.getConverseStream(memory) + if err != nil { + slog.Error(fmt.Sprintf("getConvserseStream for input text: [%s] failed, err: %v", inputText, err), logTag) + return + } + // defer func() { + // if resp != nil { // Close stream object + // resp.Close() + // } + // }() + slog.Debug(fmt.Sprintf("getConvserseStream start to recv for input text: [%s]", inputText), logTag) + + var sentence, fullContent string + var firstSentenceSent bool + for event := range resp.GetStream().Events() { + if startTime.UnixMicro() < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv interrupt and flushing for input text: [%s], startTs: %d, outdateTs: %d", + inputText, startTime.UnixMicro(), outdateTs.Load()), logTag) + break + } + var content string + + switch v := event.(type) { + // case *types.ConverseStreamOutputMemberMessageStart: + // msg.Role = v.Value.Role + case *types.ConverseStreamOutputMemberContentBlockDelta: + textResponse := v.Value.Delta.(*types.ContentBlockDeltaMemberText) + content = textResponse.Value + + case *types.UnknownUnionMember: + fmt.Println("unknown tag:", v.Tag) + } + + // chatCompletions, err := resp.Recv() + // if errors.Is(err, io.EOF) { + // slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s], io.EOF break", inputText), logTag) + // break + // } + + // if len(chatCompletions.Choices) > 0 && chatCompletions.Choices[0].Delta.Content != "" { + // content = chatCompletions.Choices[0].Delta.Content + // } + fullContent += content + + for { + // feed content and check whether sentence is available + var sentenceIsFinal bool + sentence, content, sentenceIsFinal = parseSentence(sentence, content) + if len(sentence) == 0 || !sentenceIsFinal { + slog.Debug(fmt.Sprintf("sentence %s is empty or not final", sentence), logTag) + break + } + slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) + + // send sentence + outputData, err := rtego.NewData("text_data") + if err != nil { + slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) + break + } + outputData.SetProperty(dataOutTextDataPropertyText, sentence) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) + break + } else { + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] sent sentence [%s]", inputText, sentence), logTag) + } + sentence = "" + + if !firstSentenceSent { + firstSentenceSent = true + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] first sentence sent, first_sentency_latency %dms", + inputText, time.Since(startTime).Milliseconds()), logTag) + } + } + } + + // remember response as assistant content in memory + memoryChan <- types.Message{ + Role: types.ConversationRoleAssistant, + Content: []types.ContentBlock{ + &types.ContentBlockMemberText{ + Value: fullContent, + }, + }, + } + + // send end of segment + outputData, _ := rtego.NewData("text_data") + outputData.SetProperty(dataOutTextDataPropertyText, sentence) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) + } else { + slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) + } + }(time.Now(), inputText, append([]types.Message{}, memory...)) +} + +func init() { + slog.Info("init") + + // Register addon + rtego.RegisterAddonAsExtension( + "bedrock_llm", + rtego.NewDefaultExtensionAddon(newBedrockLLMExtension), + ) +} diff --git a/server/main.go b/server/main.go index 20d1fff27..f7c6745f2 100644 --- a/server/main.go +++ b/server/main.go @@ -108,6 +108,26 @@ func processManifest(manifestJsonFile string) (err error) { manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="azure_tts").property.azure_subscription_region`, azureTtsRegion) } + awsRegion := os.Getenv("AWS_REGION") + if awsRegion != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.region`, awsRegion) + } + + awsAccessKey := os.Getenv("AWS_ACCESS_KEY_ID") + if awsAccessKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.access_key`, awsAccessKey) + } + + awsSecretKey := os.Getenv("AWS_SECRET_ACCESS_KEY") + if awsSecretKey != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.secret_key`, awsSecretKey) + } + + bedrockModel := os.Getenv("AWS_BEDROCK_MODEL") + if bedrockModel != "" { + manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="bedrock_llm").property.model`, bedrockModel) + } + elevenlabsTtsKey := os.Getenv("ELEVENLABS_TTS_KEY") if elevenlabsTtsKey != "" { manifestJson, _ = sjson.Set(manifestJson, `predefined_graphs.0.nodes.#(name=="elevenlabs_tts").property.api_key`, elevenlabsTtsKey) From 7f7da144fac69f4a6fea56a57d028a2a693654ba Mon Sep 17 00:00:00 2001 From: xxxxl_sun <31622273+sunxilin@users.noreply.github.com> Date: Wed, 24 Jul 2024 22:51:25 +0800 Subject: [PATCH 53/72] reduce side effects of unused python extensions (#110) Co-authored-by: sunxilin --- .../chat_transcriber_python/__init__.py | 2 +- .../chat_transcriber_addon.py | 26 ++++ .../chat_transcriber_extension.py | 30 +--- agents/addon/extension/cosy_tts/__init__.py | 2 +- .../extension/cosy_tts/cosy_tts_addon.py | 26 ++++ .../{main.py => cosy_tts_extension.py} | 80 +++++------ .../elevenlabs_tts_python/__init__.py | 2 +- .../elevenlabs_tts_addon.py | 33 +++++ .../interrupt_detector_python/__init__.py | 2 +- .../interrupt_detector_addon.py | 34 +++++ .../interrupt_detector_extension.py | 27 +--- .../openai_chatgpt_python/__init__.py | 2 +- .../openai_chatgpt_addon.py | 33 +++++ .../extension/qwen_llm_python/__init__.py | 2 +- .../qwen_llm_python/qwen_llm_addon.py | 33 +++++ .../{main.py => qwen_llm_extension.py} | 130 +++++++++--------- 16 files changed, 306 insertions(+), 158 deletions(-) create mode 100644 agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py create mode 100644 agents/addon/extension/cosy_tts/cosy_tts_addon.py rename agents/addon/extension/cosy_tts/{main.py => cosy_tts_extension.py} (80%) create mode 100644 agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py create mode 100644 agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py create mode 100644 agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py create mode 100644 agents/addon/extension/qwen_llm_python/qwen_llm_addon.py rename agents/addon/extension/qwen_llm_python/{main.py => qwen_llm_extension.py} (67%) diff --git a/agents/addon/extension/chat_transcriber_python/__init__.py b/agents/addon/extension/chat_transcriber_python/__init__.py index f8689834c..3a570f49b 100644 --- a/agents/addon/extension/chat_transcriber_python/__init__.py +++ b/agents/addon/extension/chat_transcriber_python/__init__.py @@ -1,4 +1,4 @@ -from . import chat_transcriber_extension +from . import chat_transcriber_addon from .log import logger logger.info("chat_transcriber_python extension loaded") diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py new file mode 100644 index 000000000..8844c12e5 --- /dev/null +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py @@ -0,0 +1,26 @@ +from rte_runtime_python import ( + Addon, + register_addon_as_extension, + Rte, +) +from .log import logger + + +@register_addon_as_extension("chat_transcriber_python") +class ChatTranscriberExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .chat_transcriber_extension import ChatTranscriberExtension + + rte.on_create_instance_done(ChatTranscriberExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index eae3d1b49..f30e026b0 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -8,9 +8,7 @@ import json from rte_runtime_python import ( - Addon, Extension, - register_addon_as_extension, Rte, Cmd, Data, @@ -131,11 +129,13 @@ def on_data(self, rte: Rte, data: Data) -> None: ) try: - text = json.dumps({ - "uid": stream_id, - "text" :text, - "is_final": end_of_segment, - }) + text = json.dumps( + { + "uid": stream_id, + "text": text, + "is_final": end_of_segment, + } + ) text_buf = text.encode("utf-8") except Exception as e: logger.warning(f"on_data SerializeToString error: {e}") @@ -154,19 +154,3 @@ def on_data(self, rte: Rte, data: Data) -> None: except Exception as e: logger.warning(f"on_data new_data error: {e}") return - -@register_addon_as_extension("chat_transcriber_python") -class ChatTranscriberExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - return - - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: - logger.info("on_create_instance") - rte.on_create_instance_done(ChatTranscriberExtension(addon_name), context) - - def on_deinit(self, rte: Rte) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/cosy_tts/__init__.py b/agents/addon/extension/cosy_tts/__init__.py index 4fece02f2..d7a1c8ec4 100644 --- a/agents/addon/extension/cosy_tts/__init__.py +++ b/agents/addon/extension/cosy_tts/__init__.py @@ -1,3 +1,3 @@ -from . import main +from . import cosy_tts_addon print("cosy_tts extension loaded") diff --git a/agents/addon/extension/cosy_tts/cosy_tts_addon.py b/agents/addon/extension/cosy_tts/cosy_tts_addon.py new file mode 100644 index 000000000..0cdc96488 --- /dev/null +++ b/agents/addon/extension/cosy_tts/cosy_tts_addon.py @@ -0,0 +1,26 @@ +from rte_runtime_python import ( + Addon, + register_addon_as_extension, + Rte, +) +from .log import logger + + +@register_addon_as_extension("cosy_tts") +class CosyTTSExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("CosyTTSExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .cosy_tts_extension import CosyTTSExtension + + rte.on_create_instance_done(CosyTTSExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("CosyTTSExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/cosy_tts/main.py b/agents/addon/extension/cosy_tts/cosy_tts_extension.py similarity index 80% rename from agents/addon/extension/cosy_tts/main.py rename to agents/addon/extension/cosy_tts/cosy_tts_extension.py index d068b4f27..868d62014 100644 --- a/agents/addon/extension/cosy_tts/main.py +++ b/agents/addon/extension/cosy_tts/cosy_tts_extension.py @@ -7,9 +7,7 @@ # import traceback from rte_runtime_python import ( - Addon, Extension, - register_addon_as_extension, Rte, Cmd, PcmFrame, @@ -18,7 +16,6 @@ StatusCode, CmdResult, MetadataInfo, - RTE_PIXEL_FMT, ) from typing import List, Any import dashscope @@ -28,6 +25,7 @@ from dashscope.audio.tts_v2 import ResultCallback, SpeechSynthesizer, AudioFormat from .log import logger + class CosyTTSCallback(ResultCallback): _player = None _stream = None @@ -68,38 +66,39 @@ def get_frame(self, data: bytes) -> PcmFrame: f.alloc_buf(self.frame_size) buff = f.lock_buf() if len(data) < self.frame_size: - buff[:] = bytes(self.frame_size) #fill with 0 - buff[:len(data)] = data + buff[:] = bytes(self.frame_size) # fill with 0 + buff[: len(data)] = data f.unlock_buf(buff) return f - + def cancel(self) -> None: self.canceled = True def on_data(self, data: bytes) -> None: if self.canceled: return - - #logger.info("audio result length: %d, %d", len(data), self.frame_size) + + # logger.info("audio result length: %d, %d", len(data), self.frame_size) try: chunk = int(len(data) / self.frame_size) offset = 0 for i in range(0, chunk): if self.canceled: return - f = self.get_frame(data[offset:offset + self.frame_size]) + f = self.get_frame(data[offset : offset + self.frame_size]) self.rte.send_pcm_frame(f) offset += self.frame_size - + if self.canceled: return if offset < len(data): size = len(data) - offset - f = self.get_frame(data[offset:offset+size]) + f = self.get_frame(data[offset : offset + size]) self.rte.send_pcm_frame(f) except Exception as e: logger.exception(e) + class CosyTTSExtension(Extension): def __init__(self, name: str): super().__init__(name) @@ -111,15 +110,13 @@ def __init__(self, name: str): self.callback = None self.format = None self.outdateTs = datetime.now() - + self.stopped = False self.thread = None self.queue = queue.Queue() self.mutex = threading.Lock() - def on_init( - self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo - ) -> None: + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: logger.info("CosyTTSExtension on_init") rte.on_init_done(manifest, property) @@ -133,20 +130,20 @@ def on_start(self, rte: Rte) -> None: dashscope.api_key = self.api_key f = AudioFormat.PCM_16000HZ_MONO_16BIT if self.sample_rate == 8000: - f = AudioFormat.PCM_8000HZ_MONO_16BIT + f = AudioFormat.PCM_8000HZ_MONO_16BIT elif self.sample_rate == 16000: f = AudioFormat.PCM_16000HZ_MONO_16BIT elif self.sample_rate == 22050: - f = AudioFormat.PCM_22050HZ_MONO_16BIT + f = AudioFormat.PCM_22050HZ_MONO_16BIT elif self.sample_rate == 24000: - f = AudioFormat.PCM_24000HZ_MONO_16BIT + f = AudioFormat.PCM_24000HZ_MONO_16BIT elif self.sample_rate == 44100: - f = AudioFormat.PCM_44100HZ_MONO_16BIT + f = AudioFormat.PCM_44100HZ_MONO_16BIT elif self.sample_rate == 48000: - f = AudioFormat.PCM_48000HZ_MONO_16BIT + f = AudioFormat.PCM_48000HZ_MONO_16BIT else: - logger.info("unknown sample rate %d", self.sample_rate) - exit() + logger.info("unknown sample rate %d", self.sample_rate) + exit() self.format = f @@ -169,7 +166,7 @@ def on_deinit(self, rte: Rte) -> None: def need_interrupt(self, ts: datetime.time) -> bool: return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 - + def async_handle(self, rte: Rte): try: tts = None @@ -195,15 +192,20 @@ def async_handle(self, rte: Rte): if self.need_interrupt(ts): continue - + if callback is not None and callback.closed is True: tts = None if tts is None: logger.info("creating tts") callback = CosyTTSCallback(rte, self.sample_rate) - tts = SpeechSynthesizer(model=self.model, voice=self.voice, format=self.format, callback=callback) - + tts = SpeechSynthesizer( + model=self.model, + voice=self.voice, + format=self.format, + callback=callback, + ) + logger.info("on message %s", inputText) tts.streaming_call(inputText) except Exception as e: @@ -212,7 +214,7 @@ def async_handle(self, rte: Rte): finally: if tts is not None: tts.streaming_complete() - + def flush(self): logger.info("CosyTTSExtension flush") while not self.queue.empty(): @@ -225,9 +227,9 @@ def on_data(self, rte: Rte, data: Data) -> None: if len(inputText) == 0: logger.info("ignore empty text") return - + is_end = data.get_property_bool("end_of_segment") - + logger.info("on data %s %d", inputText, is_end) self.queue.put((inputText, datetime.now())) @@ -241,26 +243,12 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: self.outdateTs = datetime.now() self.flush() cmd_out = Cmd.create("flush") - rte.send_cmd(cmd_out, lambda rte, result: print("DefaultExtension send_cmd done")) + rte.send_cmd( + cmd_out, lambda rte, result: print("DefaultExtension send_cmd done") + ) else: logger.info("unknown cmd %s", cmdName) cmd_result = CmdResult.create(StatusCode.OK) cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) - -@register_addon_as_extension("cosy_tts") -class CosyTTSExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: - logger.info("CosyTTSExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: - logger.info("on_create_instance") - rte.on_create_instance_done(CosyTTSExtension(addon_name), context) - - def on_deinit(self, rte: Rte) -> None: - logger.info("CosyTTSExtensionAddon on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/elevenlabs_tts_python/__init__.py b/agents/addon/extension/elevenlabs_tts_python/__init__.py index f80e230ad..8cf7e25f7 100644 --- a/agents/addon/extension/elevenlabs_tts_python/__init__.py +++ b/agents/addon/extension/elevenlabs_tts_python/__init__.py @@ -1,4 +1,4 @@ -from . import elevenlabs_tts_extension +from . import elevenlabs_tts_addon from .log import logger diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py new file mode 100644 index 000000000..2b6a93efb --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py @@ -0,0 +1,33 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte_runtime_python import ( + Addon, + register_addon_as_extension, + Rte, +) +from .log import logger + + +@register_addon_as_extension("elevenlabs_tts_python") +class ElevenlabsTTSExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + from .elevenlabs_tts_extension import ElevenlabsTTSExtension + + rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/interrupt_detector_python/__init__.py b/agents/addon/extension/interrupt_detector_python/__init__.py index a3ec245c3..8692cc027 100644 --- a/agents/addon/extension/interrupt_detector_python/__init__.py +++ b/agents/addon/extension/interrupt_detector_python/__init__.py @@ -1,4 +1,4 @@ -from . import interrupt_detector_extension +from . import interrupt_detector_addon from .log import logger logger.info("interrupt_detector_python extension loaded") diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py new file mode 100644 index 000000000..accab6920 --- /dev/null +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py @@ -0,0 +1,34 @@ +# +# +# Agora Real Time Engagement +# Created by XinHui Li in 2024-07. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# + +from rte_runtime_python import ( + Addon, + register_addon_as_extension, + Rte, +) +from .log import logger + + +@register_addon_as_extension("interrupt_detector_python") +class InterruptDetectorExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .interrupt_detector_extension import InterruptDetectorExtension + + rte.on_create_instance_done(InterruptDetectorExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py index a970ce563..dcc220f35 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -7,9 +7,7 @@ # from rte_runtime_python import ( - Addon, Extension, - register_addon_as_extension, Rte, Cmd, Data, @@ -84,27 +82,16 @@ def on_data(self, rte: Rte, data: Data) -> None: if final or len(text) >= 2: flush_cmd = Cmd.create(CMD_NAME_FLUSH) - rte.send_cmd(flush_cmd, lambda rte, result: print("InterruptDetectorExtensionAddon send_cmd done")) + rte.send_cmd( + flush_cmd, + lambda rte, result: print( + "InterruptDetectorExtensionAddon send_cmd done" + ), + ) logger.info(f"sent cmd: {CMD_NAME_FLUSH}") - + d = Data.create("text_data") d.set_property_bool(TEXT_DATA_FINAL_FIELD, final) d.set_property_string(TEXT_DATA_TEXT_FIELD, text) rte.send_data(d) - -@register_addon_as_extension("interrupt_detector_python") -class InterruptDetectorExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: - logger.info("on_init") - rte.on_init_done(manifest, property) - return - - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: - logger.info("on_create_instance") - rte.on_create_instance_done(InterruptDetectorExtension(addon_name), context) - - def on_deinit(self, rte: Rte) -> None: - logger.info("on_deinit") - rte.on_deinit_done() - return diff --git a/agents/addon/extension/openai_chatgpt_python/__init__.py b/agents/addon/extension/openai_chatgpt_python/__init__.py index 0b89ea8a7..42c4cd124 100644 --- a/agents/addon/extension/openai_chatgpt_python/__init__.py +++ b/agents/addon/extension/openai_chatgpt_python/__init__.py @@ -1,4 +1,4 @@ -from . import openai_chatgpt_extension +from . import openai_chatgpt_addon from .log import logger logger.info("openai_chatgpt_python extension loaded") diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py new file mode 100644 index 000000000..7f2c039a2 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py @@ -0,0 +1,33 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte_runtime_python import ( + Addon, + register_addon_as_extension, + Rte, +) +from .log import logger + + +@register_addon_as_extension("openai_chatgpt_python") +class OpenAIChatGPTExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("OpenAIChatGPTExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + logger.info("on_create_instance") + + from .openai_chatgpt_extension import OpenAIChatGPTExtension + + rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("OpenAIChatGPTExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/qwen_llm_python/__init__.py b/agents/addon/extension/qwen_llm_python/__init__.py index d3832c27a..b5b5c7c25 100644 --- a/agents/addon/extension/qwen_llm_python/__init__.py +++ b/agents/addon/extension/qwen_llm_python/__init__.py @@ -1,3 +1,3 @@ -from . import main +from . import qwen_llm_addon print("qwen_llm_python extension loaded") diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py new file mode 100644 index 000000000..ee63185f9 --- /dev/null +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py @@ -0,0 +1,33 @@ +# +# +# Agora Real Time Engagement +# Created by Wei Hu in 2024-05. +# Copyright (c) 2024 Agora IO. All rights reserved. +# +# +from rte_runtime_python import ( + Addon, + register_addon_as_extension, + Rte, +) +from .log import logger + + +@register_addon_as_extension("qwen_llm_python") +class QWenLLMExtensionAddon(Addon): + def on_init(self, rte: Rte, manifest, property) -> None: + logger.info("QWenLLMExtensionAddon on_init") + rte.on_init_done(manifest, property) + return + + def on_create_instance(self, rte: Rte, addon_name: str, context): + logger.info("on_create_instance") + + from .qwen_llm_extension import QWenLLMExtension + + rte.on_create_instance_done(QWenLLMExtension(addon_name), context) + + def on_deinit(self, rte: Rte) -> None: + logger.info("QWenLLMExtensionAddon on_deinit") + rte.on_deinit_done() + return diff --git a/agents/addon/extension/qwen_llm_python/main.py b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py similarity index 67% rename from agents/addon/extension/qwen_llm_python/main.py rename to agents/addon/extension/qwen_llm_python/qwen_llm_extension.py index e7c8da2c6..90ce144ce 100644 --- a/agents/addon/extension/qwen_llm_python/main.py +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py @@ -6,16 +6,13 @@ # # from rte_runtime_python import ( - Addon, Extension, - register_addon_as_extension, Rte, Cmd, Data, StatusCode, CmdResult, MetadataInfo, - RTE_PIXEL_FMT, ) from rte_runtime_python.image_frame import ImageFrame from typing import List, Any @@ -26,12 +23,20 @@ from http import HTTPStatus from .log import logger + def isEnd(content: str) -> bool: - last = content[len(content)-1] - return last == ',' or last == ',' or \ - last == '.' or last == '。' or \ - last == '?' or last == '?' or \ - last == '!' or last == '!' + last = content[len(content) - 1] + return ( + last == "," + or last == "," + or last == "." + or last == "。" + or last == "?" + or last == "?" + or last == "!" + or last == "!" + ) + class QWenLLMExtension(Extension): def __init__(self, name: str): @@ -52,7 +57,7 @@ def __init__(self, name: str): def on_msg(self, role: str, content: str) -> None: self.mutex.acquire() try: - self.history.append({'role': role, 'content': content}) + self.history.append({"role": role, "content": content}) if len(self.history) > self.max_history: self.history = self.history[1:] finally: @@ -61,7 +66,7 @@ def on_msg(self, role: str, content: str) -> None: def get_messages(self) -> List[Any]: messages = [] if len(self.prompt) > 0: - messages.append({'role': 'system', 'content': self.prompt}) + messages.append({"role": "system", "content": self.prompt}) self.mutex.acquire() try: for h in self.history: @@ -72,48 +77,57 @@ def get_messages(self) -> List[Any]: def need_interrupt(self, ts: datetime.time) -> bool: return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 - + def call(self, messages: List[Any]): logger.info("before call %s", messages) - response = dashscope.Generation.call("qwen-max", - messages=messages, - result_format='message', # set the result to be "message" format. - stream=False, # set streaming output - incremental_output=False # get streaming output incrementally - ) + response = dashscope.Generation.call( + "qwen-max", + messages=messages, + result_format="message", # set the result to be "message" format. + stream=False, # set streaming output + incremental_output=False, # get streaming output incrementally + ) if response.status_code == HTTPStatus.OK: - self.on_msg(response.output.choices[0]['message']['role'], response.output.choices[0]['message']['content']) - logger.info("on response %s", response.output.choices[0]['message']['content']) + self.on_msg( + response.output.choices[0]["message"]["role"], + response.output.choices[0]["message"]["content"], + ) + logger.info( + "on response %s", response.output.choices[0]["message"]["content"] + ) else: logger.info("Failed to get response %s", response) - - def call_with_stream(self, rte: Rte, ts :datetime.time, inputText: str, messages: List[Any]): + + def call_with_stream( + self, rte: Rte, ts: datetime.time, inputText: str, messages: List[Any] + ): if self.need_interrupt(ts): logger.warning("out of date, %s, %s", self.outdateTs, ts) return if len(self.ongoing) > 0: - messages.append({'role':'assistant', 'content':self.ongoing}) - messages.append({'role':'user', 'content':inputText}) + messages.append({"role": "assistant", "content": self.ongoing}) + messages.append({"role": "user", "content": inputText}) logger.info("before call %s %s", messages, ts) - responses = dashscope.Generation.call(self.model, - messages=messages, - result_format='message', # set the result to be "message" format. - stream=True, # set streaming output - incremental_output=True # get streaming output incrementally - ) + responses = dashscope.Generation.call( + self.model, + messages=messages, + result_format="message", # set the result to be "message" format. + stream=True, # set streaming output + incremental_output=True, # get streaming output incrementally + ) total = "" partial = "" for response in responses: if self.need_interrupt(ts): if len(self.ongoing) > 0: - self.on_msg('user', inputText) - self.on_msg('assistant', self.ongoing) - self.ongoing = '' + self.on_msg("user", inputText) + self.on_msg("assistant", self.ongoing) + self.ongoing = "" logger.warning("out of date, %s, %s", self.outdateTs, ts) return if response.status_code == HTTPStatus.OK: - temp = response.output.choices[0]['message']['content'] + temp = response.output.choices[0]["message"]["content"] if len(temp) == 0: continue partial += temp @@ -126,10 +140,15 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, inputText: str, messages total += partial partial = "" else: - logger.info('Request id: %s, Status code: %s, error code: %s, error message: %s' % ( - response.request_id, response.status_code, - response.code, response.message - )) + logger.info( + "Request id: %s, Status code: %s, error code: %s, error message: %s" + % ( + response.request_id, + response.status_code, + response.code, + response.message, + ) + ) return if len(partial) > 0: d = Data.create("text_data") @@ -142,10 +161,8 @@ def call_with_stream(self, rte: Rte, ts :datetime.time, inputText: str, messages self.on_msg("user", inputText) self.on_msg("assistant", total) logger.info("on response %s", total) - - def on_init( - self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo - ) -> None: + + def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: logger.info("QWenLLMExtension on_init") rte.on_init_done(manifest, property) @@ -184,17 +201,17 @@ def on_data(self, rte: Rte, data: Data) -> None: if not is_final: logger.info("ignore non final") return - + inputText = data.get_property_string("text") if len(inputText) == 0: logger.info("ignore empty text") return - + ts = datetime.now() - + logger.info("on data %s, %s", inputText, ts) self.queue.put((inputText, ts)) - + def async_handle(self, rte: Rte): while not self.stopped: try: @@ -218,9 +235,12 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmdName = cmd.get_name() if cmdName == "flush": self.outdateTs = datetime.now() - #self.flush() + # self.flush() cmd_out = Cmd.create("flush") - rte.send_cmd(cmd_out, lambda rte, result: print("QWenLLMExtensionAddon send_cmd done")) + rte.send_cmd( + cmd_out, + lambda rte, result: print("QWenLLMExtensionAddon send_cmd done"), + ) else: logger.info("unknown cmd %s", cmdName) @@ -229,19 +249,3 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: logger.info("QWenLLMExtension on_cmd") - -@register_addon_as_extension("qwen_llm_python") -class QWenLLMExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: - logger.info("QWenLLMExtensionAddon on_init") - rte.on_init_done(manifest, property) - return - - def on_create_instance(self, rte: Rte, addon_name: str, context) -> Extension: - logger.info("on_create_instance") - rte.on_create_instance_done(QWenLLMExtension(addon_name), context) - - def on_deinit(self, rte: Rte) -> None: - logger.info("QWenLLMExtensionAddon on_deinit") - rte.on_deinit_done() - return From 9f2e6a7b2dbf5596e2c1bddb5306813668188b42 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 15:00:15 +0000 Subject: [PATCH 54/72] Revert "feat: remove golang" This reverts commit af777ffcecc7c1e1c54c9130508fdd5671050f26. --- .../extension/chat_transcriber/extension.go | 147 ++++++ .../addon/extension/chat_transcriber/go.mod | 10 + .../addon/extension/chat_transcriber/go.sum | 4 + .../extension/chat_transcriber/manifest.json | 45 ++ .../chat_transcriber/pb/chat_text.pb.go | 475 ++++++++++++++++++ .../chat_transcriber/pb/chat_text.proto | 37 ++ .../extension/chat_transcriber/property.json | 1 + .../elevenlabs_tts/elevenlabs_tts.go | 82 +++ .../elevenlabs_tts_extension.go | 340 +++++++++++++ agents/addon/extension/elevenlabs_tts/go.mod | 10 + agents/addon/extension/elevenlabs_tts/go.sum | 2 + .../extension/elevenlabs_tts/manifest.json | 74 +++ agents/addon/extension/elevenlabs_tts/pcm.go | 104 ++++ .../extension/elevenlabs_tts/property.json | 1 + .../extension/interrupt_detector/extension.go | 78 +++ .../addon/extension/interrupt_detector/go.mod | 7 + .../interrupt_detector/manifest.json | 38 ++ .../interrupt_detector/property.json | 1 + .../addon/extension/openai_chatgpt/README.md | 0 agents/addon/extension/openai_chatgpt/go.mod | 17 + agents/addon/extension/openai_chatgpt/go.sum | 12 + .../extension/openai_chatgpt/manifest.json | 83 +++ .../openai_chatgpt/openai_chatgpt.go | 111 ++++ .../openai_chatgpt_extension.go | 391 ++++++++++++++ .../extension/openai_chatgpt/property.json | 1 + .../extension/openai_chatgpt/sentence.go | 30 ++ .../extension/openai_chatgpt/sentence_test.go | 150 ++++++ 27 files changed, 2251 insertions(+) create mode 100644 agents/addon/extension/chat_transcriber/extension.go create mode 100644 agents/addon/extension/chat_transcriber/go.mod create mode 100644 agents/addon/extension/chat_transcriber/go.sum create mode 100644 agents/addon/extension/chat_transcriber/manifest.json create mode 100644 agents/addon/extension/chat_transcriber/pb/chat_text.pb.go create mode 100644 agents/addon/extension/chat_transcriber/pb/chat_text.proto create mode 100644 agents/addon/extension/chat_transcriber/property.json create mode 100644 agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go create mode 100644 agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go create mode 100644 agents/addon/extension/elevenlabs_tts/go.mod create mode 100644 agents/addon/extension/elevenlabs_tts/go.sum create mode 100644 agents/addon/extension/elevenlabs_tts/manifest.json create mode 100644 agents/addon/extension/elevenlabs_tts/pcm.go create mode 100644 agents/addon/extension/elevenlabs_tts/property.json create mode 100644 agents/addon/extension/interrupt_detector/extension.go create mode 100644 agents/addon/extension/interrupt_detector/go.mod create mode 100644 agents/addon/extension/interrupt_detector/manifest.json create mode 100644 agents/addon/extension/interrupt_detector/property.json create mode 100644 agents/addon/extension/openai_chatgpt/README.md create mode 100644 agents/addon/extension/openai_chatgpt/go.mod create mode 100644 agents/addon/extension/openai_chatgpt/go.sum create mode 100644 agents/addon/extension/openai_chatgpt/manifest.json create mode 100644 agents/addon/extension/openai_chatgpt/openai_chatgpt.go create mode 100644 agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go create mode 100644 agents/addon/extension/openai_chatgpt/property.json create mode 100644 agents/addon/extension/openai_chatgpt/sentence.go create mode 100644 agents/addon/extension/openai_chatgpt/sentence_test.go diff --git a/agents/addon/extension/chat_transcriber/extension.go b/agents/addon/extension/chat_transcriber/extension.go new file mode 100644 index 000000000..33fba48ab --- /dev/null +++ b/agents/addon/extension/chat_transcriber/extension.go @@ -0,0 +1,147 @@ +/** + * + * Agora Real Time Engagement + * Created by Wei Hu in 2022-10. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +package extension + +import ( + "chat_transcriber/pb" + "fmt" + "log/slog" + "time" + + "agora.io/rte/rtego" + "google.golang.org/protobuf/proto" +) + +const ( + textDataTextField = "text" + textDataFinalField = "is_final" + textDataStreamIdField = "stream_id" + textDataEndOfSegmentField = "end_of_segment" +) + +var ( + logTag = slog.String("extension", "CHAT_TRANSCRIBER_EXTENSION") +) + +type chatTranscriberExtension struct { + rtego.DefaultExtension + + cachedTextMap map[uint32]string // record the cached text data for each stream id +} + +func newExtension(name string) rtego.Extension { + return &chatTranscriberExtension{ + cachedTextMap: make(map[uint32]string), + } +} + +// OnData receives data from rte graph. +// current supported data: +// - name: text_data +// example: +// {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} +func (p *chatTranscriberExtension) OnData( + rte rtego.Rte, + data rtego.Data, +) { + // Get the text data from data. + text, err := data.GetPropertyString(textDataTextField) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataTextField, err), logTag) + return + } + + // Get the 'is_final' flag from data which indicates whether the text is final, + // otherwise it could be overwritten by the next text. + final, err := data.GetPropertyBool(textDataFinalField) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataFinalField, err), logTag) + return + } + + // Get the stream id from data. + streamId, err := data.GetPropertyUint32(textDataStreamIdField) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataStreamIdField, err), logTag) + return + } + + // Get the 'end_of_segment' flag from data which indicates whether a line break is needed. + endOfSegment, err := data.GetPropertyBool(textDataEndOfSegmentField) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataEndOfSegmentField, err), logTag) + return + } + + slog.Debug(fmt.Sprintf( + "OnData %s: %s %s: %t %s: %d %s: %t", + textDataTextField, + text, + textDataFinalField, + final, + textDataStreamIdField, + streamId, + textDataEndOfSegmentField, + endOfSegment), logTag) + + // We cache all final text data and append the non-final text data to the cached data + // until the end of the segment. + if endOfSegment { + if cachedText, ok := p.cachedTextMap[streamId]; ok { + text = cachedText + text + delete(p.cachedTextMap, streamId) + } + } else { + if final { + if cachedText, ok := p.cachedTextMap[streamId]; ok { + text = cachedText + text + p.cachedTextMap[streamId] = text + } else { + p.cachedTextMap[streamId] = text + } + } + } + + pb := pb.Text{ + Uid: int32(streamId), + DataType: "transcribe", + Texttime: time.Now().UnixMilli(), + Words: []*pb.Word{ + { + Text: text, + IsFinal: endOfSegment, + }, + }, + } + + pbData, err := proto.Marshal(&pb) + if err != nil { + slog.Warn(fmt.Sprintf("OnData Marshal error: %v", err), logTag) + return + } + + // convert the origin text data to the protobuf data and send it to the graph. + rteData, err := rtego.NewData("data") + rteData.SetPropertyBytes("data", pbData) + if err != nil { + slog.Warn(fmt.Sprintf("OnData NewData error: %v", err), logTag) + return + } + + rte.SendData(rteData) +} + +func init() { + slog.Info("chat_transcriber extension init", logTag) + + // Register addon + rtego.RegisterAddonAsExtension( + "chat_transcriber", + rtego.NewDefaultExtensionAddon(newExtension), + ) +} diff --git a/agents/addon/extension/chat_transcriber/go.mod b/agents/addon/extension/chat_transcriber/go.mod new file mode 100644 index 000000000..311ff8aae --- /dev/null +++ b/agents/addon/extension/chat_transcriber/go.mod @@ -0,0 +1,10 @@ +module chat_transcriber + +go 1.18 + +replace agora.io/rte => ../../../interface + +require ( + agora.io/rte v0.0.0-00010101000000-000000000000 + google.golang.org/protobuf v1.34.2 +) diff --git a/agents/addon/extension/chat_transcriber/go.sum b/agents/addon/extension/chat_transcriber/go.sum new file mode 100644 index 000000000..73d32b16b --- /dev/null +++ b/agents/addon/extension/chat_transcriber/go.sum @@ -0,0 +1,4 @@ +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= diff --git a/agents/addon/extension/chat_transcriber/manifest.json b/agents/addon/extension/chat_transcriber/manifest.json new file mode 100644 index 000000000..269a5a534 --- /dev/null +++ b/agents/addon/extension/chat_transcriber/manifest.json @@ -0,0 +1,45 @@ +{ + "type": "extension", + "name": "chat_transcriber", + "version": "0.1.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime", + "version": "0.1.0" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.1.0" + } + ], + "api": { + "property": {}, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + }, + "stream_id": { + "type": "uint32" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ], + "data_out": [ + { + "name": "data" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber/pb/chat_text.pb.go b/agents/addon/extension/chat_transcriber/pb/chat_text.pb.go new file mode 100644 index 000000000..034473fd0 --- /dev/null +++ b/agents/addon/extension/chat_transcriber/pb/chat_text.pb.go @@ -0,0 +1,475 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.33.0 +// protoc (unknown) +// source: chat_text.proto + +package pb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type Text struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Vendor int32 `protobuf:"varint,1,opt,name=vendor,proto3" json:"vendor,omitempty"` + Version int32 `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"` + Seqnum int32 `protobuf:"varint,3,opt,name=seqnum,proto3" json:"seqnum,omitempty"` + Uid int32 `protobuf:"varint,4,opt,name=uid,proto3" json:"uid,omitempty"` + Flag int32 `protobuf:"varint,5,opt,name=flag,proto3" json:"flag,omitempty"` + Time int64 `protobuf:"varint,6,opt,name=time,proto3" json:"time,omitempty"` // final time =first nofinal time + Lang int32 `protobuf:"varint,7,opt,name=lang,proto3" json:"lang,omitempty"` + Starttime int32 `protobuf:"varint,8,opt,name=starttime,proto3" json:"starttime,omitempty"` + Offtime int32 `protobuf:"varint,9,opt,name=offtime,proto3" json:"offtime,omitempty"` + Words []*Word `protobuf:"bytes,10,rep,name=words,proto3" json:"words,omitempty"` + EndOfSegment bool `protobuf:"varint,11,opt,name=end_of_segment,json=endOfSegment,proto3" json:"end_of_segment,omitempty"` + DurationMs int32 `protobuf:"varint,12,opt,name=duration_ms,json=durationMs,proto3" json:"duration_ms,omitempty"` + DataType string `protobuf:"bytes,13,opt,name=data_type,json=dataType,proto3" json:"data_type,omitempty"` // transcribe ,translate + Trans []*Translation `protobuf:"bytes,14,rep,name=trans,proto3" json:"trans,omitempty"` + Culture string `protobuf:"bytes,15,opt,name=culture,proto3" json:"culture,omitempty"` + Texttime int64 `protobuf:"varint,16,opt,name=texttime,proto3" json:"texttime,omitempty"` // pkg timestamp +} + +func (x *Text) Reset() { + *x = Text{} + if protoimpl.UnsafeEnabled { + mi := &file_chat_text_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Text) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Text) ProtoMessage() {} + +func (x *Text) ProtoReflect() protoreflect.Message { + mi := &file_chat_text_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Text.ProtoReflect.Descriptor instead. +func (*Text) Descriptor() ([]byte, []int) { + return file_chat_text_proto_rawDescGZIP(), []int{0} +} + +func (x *Text) GetVendor() int32 { + if x != nil { + return x.Vendor + } + return 0 +} + +func (x *Text) GetVersion() int32 { + if x != nil { + return x.Version + } + return 0 +} + +func (x *Text) GetSeqnum() int32 { + if x != nil { + return x.Seqnum + } + return 0 +} + +func (x *Text) GetUid() int32 { + if x != nil { + return x.Uid + } + return 0 +} + +func (x *Text) GetFlag() int32 { + if x != nil { + return x.Flag + } + return 0 +} + +func (x *Text) GetTime() int64 { + if x != nil { + return x.Time + } + return 0 +} + +func (x *Text) GetLang() int32 { + if x != nil { + return x.Lang + } + return 0 +} + +func (x *Text) GetStarttime() int32 { + if x != nil { + return x.Starttime + } + return 0 +} + +func (x *Text) GetOfftime() int32 { + if x != nil { + return x.Offtime + } + return 0 +} + +func (x *Text) GetWords() []*Word { + if x != nil { + return x.Words + } + return nil +} + +func (x *Text) GetEndOfSegment() bool { + if x != nil { + return x.EndOfSegment + } + return false +} + +func (x *Text) GetDurationMs() int32 { + if x != nil { + return x.DurationMs + } + return 0 +} + +func (x *Text) GetDataType() string { + if x != nil { + return x.DataType + } + return "" +} + +func (x *Text) GetTrans() []*Translation { + if x != nil { + return x.Trans + } + return nil +} + +func (x *Text) GetCulture() string { + if x != nil { + return x.Culture + } + return "" +} + +func (x *Text) GetTexttime() int64 { + if x != nil { + return x.Texttime + } + return 0 +} + +type Word struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"` + StartMs int32 `protobuf:"varint,2,opt,name=start_ms,json=startMs,proto3" json:"start_ms,omitempty"` + DurationMs int32 `protobuf:"varint,3,opt,name=duration_ms,json=durationMs,proto3" json:"duration_ms,omitempty"` + IsFinal bool `protobuf:"varint,4,opt,name=is_final,json=isFinal,proto3" json:"is_final,omitempty"` + Confidence float64 `protobuf:"fixed64,5,opt,name=confidence,proto3" json:"confidence,omitempty"` +} + +func (x *Word) Reset() { + *x = Word{} + if protoimpl.UnsafeEnabled { + mi := &file_chat_text_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Word) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Word) ProtoMessage() {} + +func (x *Word) ProtoReflect() protoreflect.Message { + mi := &file_chat_text_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Word.ProtoReflect.Descriptor instead. +func (*Word) Descriptor() ([]byte, []int) { + return file_chat_text_proto_rawDescGZIP(), []int{1} +} + +func (x *Word) GetText() string { + if x != nil { + return x.Text + } + return "" +} + +func (x *Word) GetStartMs() int32 { + if x != nil { + return x.StartMs + } + return 0 +} + +func (x *Word) GetDurationMs() int32 { + if x != nil { + return x.DurationMs + } + return 0 +} + +func (x *Word) GetIsFinal() bool { + if x != nil { + return x.IsFinal + } + return false +} + +func (x *Word) GetConfidence() float64 { + if x != nil { + return x.Confidence + } + return 0 +} + +type Translation struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + IsFinal bool `protobuf:"varint,1,opt,name=is_final,json=isFinal,proto3" json:"is_final,omitempty"` + Lang string `protobuf:"bytes,2,opt,name=lang,proto3" json:"lang,omitempty"` + Texts []string `protobuf:"bytes,3,rep,name=texts,proto3" json:"texts,omitempty"` +} + +func (x *Translation) Reset() { + *x = Translation{} + if protoimpl.UnsafeEnabled { + mi := &file_chat_text_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Translation) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Translation) ProtoMessage() {} + +func (x *Translation) ProtoReflect() protoreflect.Message { + mi := &file_chat_text_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Translation.ProtoReflect.Descriptor instead. +func (*Translation) Descriptor() ([]byte, []int) { + return file_chat_text_proto_rawDescGZIP(), []int{2} +} + +func (x *Translation) GetIsFinal() bool { + if x != nil { + return x.IsFinal + } + return false +} + +func (x *Translation) GetLang() string { + if x != nil { + return x.Lang + } + return "" +} + +func (x *Translation) GetTexts() []string { + if x != nil { + return x.Texts + } + return nil +} + +var File_chat_text_proto protoreflect.FileDescriptor + +var file_chat_text_proto_rawDesc = []byte{ + 0x0a, 0x0f, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x16, 0x61, 0x67, 0x6f, 0x72, 0x61, 0x2e, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, 0x72, + 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x72, 0x22, 0xdf, 0x03, 0x0a, 0x04, 0x54, 0x65, + 0x78, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x65, 0x6e, 0x64, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x06, 0x76, 0x65, 0x6e, 0x64, 0x6f, 0x72, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, + 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x76, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x65, 0x71, 0x6e, 0x75, 0x6d, 0x18, 0x03, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x06, 0x73, 0x65, 0x71, 0x6e, 0x75, 0x6d, 0x12, 0x10, 0x0a, 0x03, + 0x75, 0x69, 0x64, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x75, 0x69, 0x64, 0x12, 0x12, + 0x0a, 0x04, 0x66, 0x6c, 0x61, 0x67, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x66, 0x6c, + 0x61, 0x67, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x04, 0x74, 0x69, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x61, 0x6e, 0x67, 0x18, 0x07, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x04, 0x6c, 0x61, 0x6e, 0x67, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x74, + 0x61, 0x72, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x73, + 0x74, 0x61, 0x72, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x66, 0x66, 0x74, + 0x69, 0x6d, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x6f, 0x66, 0x66, 0x74, 0x69, + 0x6d, 0x65, 0x12, 0x32, 0x0a, 0x05, 0x77, 0x6f, 0x72, 0x64, 0x73, 0x18, 0x0a, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x1c, 0x2e, 0x61, 0x67, 0x6f, 0x72, 0x61, 0x2e, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, + 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x72, 0x2e, 0x57, 0x6f, 0x72, 0x64, 0x52, + 0x05, 0x77, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x24, 0x0a, 0x0e, 0x65, 0x6e, 0x64, 0x5f, 0x6f, 0x66, + 0x5f, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, + 0x65, 0x6e, 0x64, 0x4f, 0x66, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, + 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x6d, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x0a, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x73, 0x12, 0x1b, 0x0a, + 0x09, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x39, 0x0a, 0x05, 0x74, 0x72, + 0x61, 0x6e, 0x73, 0x18, 0x0e, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x23, 0x2e, 0x61, 0x67, 0x6f, 0x72, + 0x61, 0x2e, 0x63, 0x68, 0x61, 0x74, 0x5f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x62, + 0x65, 0x72, 0x2e, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x05, + 0x74, 0x72, 0x61, 0x6e, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x63, 0x75, 0x6c, 0x74, 0x75, 0x72, 0x65, + 0x18, 0x0f, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x63, 0x75, 0x6c, 0x74, 0x75, 0x72, 0x65, 0x12, + 0x1a, 0x0a, 0x08, 0x74, 0x65, 0x78, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x10, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x08, 0x74, 0x65, 0x78, 0x74, 0x74, 0x69, 0x6d, 0x65, 0x22, 0x91, 0x01, 0x0a, 0x04, + 0x57, 0x6f, 0x72, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x65, 0x78, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x74, 0x65, 0x78, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x74, 0x61, 0x72, + 0x74, 0x5f, 0x6d, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x73, 0x74, 0x61, 0x72, + 0x74, 0x4d, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x6d, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x4d, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x69, 0x73, 0x5f, 0x66, 0x69, 0x6e, 0x61, 0x6c, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x69, 0x73, 0x46, 0x69, 0x6e, 0x61, 0x6c, 0x12, + 0x1e, 0x0a, 0x0a, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x64, 0x65, 0x6e, 0x63, 0x65, 0x18, 0x05, 0x20, + 0x01, 0x28, 0x01, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x64, 0x65, 0x6e, 0x63, 0x65, 0x22, + 0x52, 0x0a, 0x0b, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x19, + 0x0a, 0x08, 0x69, 0x73, 0x5f, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x07, 0x69, 0x73, 0x46, 0x69, 0x6e, 0x61, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x6c, 0x61, 0x6e, + 0x67, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6c, 0x61, 0x6e, 0x67, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x65, 0x78, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x05, 0x74, 0x65, + 0x78, 0x74, 0x73, 0x42, 0x06, 0x5a, 0x04, 0x2e, 0x3b, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, +} + +var ( + file_chat_text_proto_rawDescOnce sync.Once + file_chat_text_proto_rawDescData = file_chat_text_proto_rawDesc +) + +func file_chat_text_proto_rawDescGZIP() []byte { + file_chat_text_proto_rawDescOnce.Do(func() { + file_chat_text_proto_rawDescData = protoimpl.X.CompressGZIP(file_chat_text_proto_rawDescData) + }) + return file_chat_text_proto_rawDescData +} + +var file_chat_text_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_chat_text_proto_goTypes = []interface{}{ + (*Text)(nil), // 0: agora.chat_transcriber.Text + (*Word)(nil), // 1: agora.chat_transcriber.Word + (*Translation)(nil), // 2: agora.chat_transcriber.Translation +} +var file_chat_text_proto_depIdxs = []int32{ + 1, // 0: agora.chat_transcriber.Text.words:type_name -> agora.chat_transcriber.Word + 2, // 1: agora.chat_transcriber.Text.trans:type_name -> agora.chat_transcriber.Translation + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_chat_text_proto_init() } +func file_chat_text_proto_init() { + if File_chat_text_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_chat_text_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Text); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_chat_text_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Word); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_chat_text_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Translation); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_chat_text_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_chat_text_proto_goTypes, + DependencyIndexes: file_chat_text_proto_depIdxs, + MessageInfos: file_chat_text_proto_msgTypes, + }.Build() + File_chat_text_proto = out.File + file_chat_text_proto_rawDesc = nil + file_chat_text_proto_goTypes = nil + file_chat_text_proto_depIdxs = nil +} diff --git a/agents/addon/extension/chat_transcriber/pb/chat_text.proto b/agents/addon/extension/chat_transcriber/pb/chat_text.proto new file mode 100644 index 000000000..9ee4e504b --- /dev/null +++ b/agents/addon/extension/chat_transcriber/pb/chat_text.proto @@ -0,0 +1,37 @@ +syntax = "proto3"; + +package agora.chat_transcriber; +option go_package = ".;pb"; + +message Text { + int32 vendor = 1; + int32 version = 2; + int32 seqnum = 3; + int32 uid = 4; + int32 flag = 5; + int64 time = 6; // final time =first nofinal time + int32 lang = 7; + int32 starttime = 8; + int32 offtime = 9; + repeated Word words = 10; + bool end_of_segment = 11; + int32 duration_ms = 12; + string data_type = 13; // transcribe ,translate + repeated Translation trans = 14; + string culture = 15; + int64 texttime = 16; // pkg timestamp +} + +message Word { + string text = 1; + int32 start_ms = 2; + int32 duration_ms = 3; + bool is_final = 4; + double confidence = 5; +} + +message Translation { + bool is_final = 1; + string lang = 2; + repeated string texts = 3; +} \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber/property.json b/agents/addon/extension/chat_transcriber/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/chat_transcriber/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go new file mode 100644 index 000000000..4d712e4f3 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts.go @@ -0,0 +1,82 @@ +/** + * + * Agora Real Time Engagement + * Created by XinHui Li in 2024-07. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "context" + "fmt" + "io" + "time" + + elevenlabs "github.com/haguro/elevenlabs-go" +) + +type elevenlabsTTS struct { + client *elevenlabs.Client + config elevenlabsTTSConfig +} + +type elevenlabsTTSConfig struct { + ApiKey string + ModelId string + OptimizeStreamingLatency int + RequestTimeoutSeconds int + SimilarityBoost float32 + SpeakerBoost bool + Stability float32 + Style float32 + VoiceId string +} + +func defaultElevenlabsTTSConfig() elevenlabsTTSConfig { + return elevenlabsTTSConfig{ + ApiKey: "", + ModelId: "eleven_multilingual_v2", + OptimizeStreamingLatency: 0, + RequestTimeoutSeconds: 30, + SimilarityBoost: 0.75, + SpeakerBoost: false, + Stability: 0.5, + Style: 0.0, + VoiceId: "pNInz6obpgDQGcFmaJgB", + } +} + +func newElevenlabsTTS(config elevenlabsTTSConfig) (*elevenlabsTTS, error) { + return &elevenlabsTTS{ + config: config, + client: elevenlabs.NewClient(context.Background(), config.ApiKey, time.Duration(config.RequestTimeoutSeconds)*time.Second), + }, nil +} + +func (e *elevenlabsTTS) textToSpeechStream(streamWriter io.Writer, text string) (err error) { + req := elevenlabs.TextToSpeechRequest{ + Text: text, + ModelID: e.config.ModelId, + VoiceSettings: &elevenlabs.VoiceSettings{ + SimilarityBoost: e.config.SimilarityBoost, + SpeakerBoost: e.config.SpeakerBoost, + Stability: e.config.Stability, + Style: e.config.Style, + }, + } + queries := []elevenlabs.QueryFunc{ + elevenlabs.LatencyOptimizations(e.config.OptimizeStreamingLatency), + elevenlabs.OutputFormat("pcm_16000"), + } + + err = e.client.TextToSpeechStream(streamWriter, e.config.VoiceId, req, queries...) + if err != nil { + return fmt.Errorf("TextToSpeechStream failed, err: %v", err) + } + + return nil +} diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go new file mode 100644 index 000000000..3b6ef4fd7 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go @@ -0,0 +1,340 @@ +/** + * + * Agora Real Time Engagement + * Created by XinHui Li in 2024-07. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "fmt" + "io" + "log/slog" + "sync" + "sync/atomic" + "time" + + "agora.io/rte/rtego" +) + +const ( + cmdInFlush = "flush" + cmdOutFlush = "flush" + dataInTextDataPropertyText = "text" + + propertyApiKey = "api_key" // Required + propertyModelId = "model_id" // Optional + propertyOptimizeStreamingLatency = "optimize_streaming_latency" // Optional + propertyRequestTimeoutSeconds = "request_timeout_seconds" // Optional + propertySimilarityBoost = "similarity_boost" // Optional + propertySpeakerBoost = "speaker_boost" // Optional + propertyStability = "stability" // Optional + propertyStyle = "style" // Optional + propertyVoiceId = "voice_id" // Optional +) + +const ( + textChanMax = 1024 +) + +var ( + logTag = slog.String("extension", "ELEVENLABS_TTS_EXTENSION") + + outdateTs atomic.Int64 + textChan chan *message + wg sync.WaitGroup +) + +type elevenlabsTTSExtension struct { + rtego.DefaultExtension + elevenlabsTTS *elevenlabsTTS +} + +type message struct { + text string + receivedTs int64 +} + +func newElevenlabsTTSExtension(name string) rtego.Extension { + return &elevenlabsTTSExtension{} +} + +// OnStart will be called when the extension is starting, +// properies can be read here to initialize and start the extension. +// current supported properties: +// - api_key (required) +// - model_id +// - optimize_streaming_latency +// - request_timeout_seconds +// - similarity_boost +// - speaker_boost +// - stability +// - style +// - voice_id +func (e *elevenlabsTTSExtension) OnStart(rte rtego.Rte) { + slog.Info("OnStart", logTag) + + // prepare configuration + elevenlabsTTSConfig := defaultElevenlabsTTSConfig() + + if apiKey, err := rte.GetPropertyString(propertyApiKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + return + } else { + elevenlabsTTSConfig.ApiKey = apiKey + } + + if modelId, err := rte.GetPropertyString(propertyModelId); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyModelId, err), logTag) + } else { + if len(modelId) > 0 { + elevenlabsTTSConfig.ModelId = modelId + } + } + + if optimizeStreamingLatency, err := rte.GetPropertyInt64(propertyOptimizeStreamingLatency); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyOptimizeStreamingLatency, err), logTag) + } else { + if optimizeStreamingLatency > 0 { + elevenlabsTTSConfig.OptimizeStreamingLatency = int(optimizeStreamingLatency) + } + } + + if requestTimeoutSeconds, err := rte.GetPropertyInt64(propertyRequestTimeoutSeconds); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyRequestTimeoutSeconds, err), logTag) + } else { + if requestTimeoutSeconds > 0 { + elevenlabsTTSConfig.RequestTimeoutSeconds = int(requestTimeoutSeconds) + } + } + + if similarityBoost, err := rte.GetPropertyFloat64(propertySimilarityBoost); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySimilarityBoost, err), logTag) + } else { + elevenlabsTTSConfig.SimilarityBoost = float32(similarityBoost) + } + + if speakerBoost, err := rte.GetPropertyBool(propertySpeakerBoost); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertySpeakerBoost, err), logTag) + } else { + elevenlabsTTSConfig.SpeakerBoost = speakerBoost + } + + if stability, err := rte.GetPropertyFloat64(propertyStability); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStability, err), logTag) + } else { + elevenlabsTTSConfig.Stability = float32(stability) + } + + if style, err := rte.GetPropertyFloat64(propertyStyle); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyStyle, err), logTag) + } else { + elevenlabsTTSConfig.Style = float32(style) + } + + if voiceId, err := rte.GetPropertyString(propertyVoiceId); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyVoiceId, err), logTag) + } else { + if len(voiceId) > 0 { + elevenlabsTTSConfig.VoiceId = voiceId + } + } + + // create elevenlabsTTS instance + elevenlabsTTS, err := newElevenlabsTTS(elevenlabsTTSConfig) + if err != nil { + slog.Error(fmt.Sprintf("newElevenlabsTTS failed, err: %v", err), logTag) + return + } + + slog.Info(fmt.Sprintf("newElevenlabsTTS succeed with ModelId: %s, VoiceId: %s", + elevenlabsTTSConfig.ModelId, elevenlabsTTSConfig.VoiceId), logTag) + + // set elevenlabsTTS instance + e.elevenlabsTTS = elevenlabsTTS + + // create pcm instance + pcm := newPcm(defaultPcmConfig()) + pcmFrameSize := pcm.getPcmFrameSize() + + // init chan + textChan = make(chan *message, textChanMax) + + go func() { + slog.Info("process textChan", logTag) + + for msg := range textChan { + if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("textChan interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load()), logTag) + continue + } + + wg.Add(1) + slog.Info(fmt.Sprintf("textChan text: [%s]", msg.text), logTag) + + r, w := io.Pipe() + startTime := time.Now() + + go func() { + defer wg.Done() + defer w.Close() + + slog.Info(fmt.Sprintf("textToSpeechStream text: [%s]", msg.text), logTag) + + err = e.elevenlabsTTS.textToSpeechStream(w, msg.text) + if err != nil { + slog.Error(fmt.Sprintf("textToSpeechStream failed, err: %v", err), logTag) + return + } + }() + + slog.Info(fmt.Sprintf("read pcm stream, text:[%s], pcmFrameSize:%d", msg.text, pcmFrameSize), logTag) + + var ( + firstFrameLatency int64 + n int + pcmFrameRead int + readBytes int + sentFrames int + ) + buf := pcm.newBuf() + + // read pcm stream + for { + if msg.receivedTs < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("read pcm stream interrupt and flushing for input text: [%s], receivedTs: %d, outdateTs: %d", + msg.text, msg.receivedTs, outdateTs.Load()), logTag) + break + } + + n, err = r.Read(buf[pcmFrameRead:]) + readBytes += n + pcmFrameRead += n + + if err != nil { + if err == io.EOF { + slog.Info("read pcm stream EOF", logTag) + break + } + + slog.Error(fmt.Sprintf("read pcm stream failed, err: %v", err), logTag) + break + } + + if pcmFrameRead != pcmFrameSize { + slog.Debug(fmt.Sprintf("the number of bytes read is [%d] inconsistent with pcm frame size", pcmFrameRead), logTag) + continue + } + + pcm.send(rte, buf) + // clear buf + buf = pcm.newBuf() + pcmFrameRead = 0 + sentFrames++ + + if firstFrameLatency == 0 { + firstFrameLatency = time.Since(startTime).Milliseconds() + slog.Info(fmt.Sprintf("first frame available for text: [%s], receivedTs: %d, firstFrameLatency: %dms", msg.text, msg.receivedTs, firstFrameLatency), logTag) + } + + slog.Debug(fmt.Sprintf("sending pcm data, text: [%s]", msg.text), logTag) + } + + if pcmFrameRead > 0 { + pcm.send(rte, buf) + sentFrames++ + slog.Info(fmt.Sprintf("sending pcm remain data, text: [%s], pcmFrameRead: %d", msg.text, pcmFrameRead), logTag) + } + + r.Close() + slog.Info(fmt.Sprintf("send pcm data finished, text: [%s], receivedTs: %d, readBytes: %d, sentFrames: %d, firstFrameLatency: %dms, finishLatency: %dms", + msg.text, msg.receivedTs, readBytes, sentFrames, firstFrameLatency, time.Since(startTime).Milliseconds()), logTag) + } + }() + + rte.OnStartDone() +} + +// OnCmd receives cmd from rte graph. +// current supported cmd: +// - name: flush +// example: +// {"name": "flush"} +func (e *elevenlabsTTSExtension) OnCmd( + rte rtego.Rte, + cmd rtego.Cmd, +) { + cmdName, err := cmd.CmdName() + if err != nil { + slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + rte.ReturnString(rtego.Error, "error", cmd) + return + } + + slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + + switch cmdName { + case cmdInFlush: + outdateTs.Store(time.Now().UnixMicro()) + + // send out + outCmd, err := rtego.NewCmd(cmdOutFlush) + if err != nil { + slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + rte.ReturnString(rtego.Error, "error", cmd) + return + } + + if err := rte.SendCmd(outCmd, nil); err != nil { + slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + rte.ReturnString(rtego.Error, "error", cmd) + return + } else { + slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + } + } + + rte.ReturnString(rtego.Ok, "ok", cmd) +} + +// OnData receives data from rte graph. +// current supported data: +// - name: text_data +// example: +// {name: text_data, properties: {text: "hello"} +func (e *elevenlabsTTSExtension) OnData( + rte rtego.Rte, + data rtego.Data, +) { + text, err := data.GetPropertyString(dataInTextDataPropertyText) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + return + } + + if len(text) == 0 { + slog.Debug("OnData text is empty, ignored", logTag) + return + } + + slog.Info(fmt.Sprintf("OnData input text: [%s]", text), logTag) + + go func() { + textChan <- &message{text: text, receivedTs: time.Now().UnixMicro()} + }() +} + +func init() { + slog.Info("elevenlabs_tts extension init", logTag) + + // Register addon + rtego.RegisterAddonAsExtension( + "elevenlabs_tts", + rtego.NewDefaultExtensionAddon(newElevenlabsTTSExtension), + ) +} diff --git a/agents/addon/extension/elevenlabs_tts/go.mod b/agents/addon/extension/elevenlabs_tts/go.mod new file mode 100644 index 000000000..bb90f1c61 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/go.mod @@ -0,0 +1,10 @@ +module elevenlabs_tts + +go 1.21 + +replace agora.io/rte => ../../../interface + +require ( + agora.io/rte v0.0.0-00010101000000-000000000000 + github.com/haguro/elevenlabs-go v0.2.4 +) diff --git a/agents/addon/extension/elevenlabs_tts/go.sum b/agents/addon/extension/elevenlabs_tts/go.sum new file mode 100644 index 000000000..6c1feddc6 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/go.sum @@ -0,0 +1,2 @@ +github.com/haguro/elevenlabs-go v0.2.4 h1:Z1a/I+b5fAtGSfrhEj97dYG1EbV9uRzSfvz5n5+ud34= +github.com/haguro/elevenlabs-go v0.2.4/go.mod h1:j15h9w2BpgxlIGWXmCKWPPDaTo2QAO83zFy5J+pFCt8= diff --git a/agents/addon/extension/elevenlabs_tts/manifest.json b/agents/addon/extension/elevenlabs_tts/manifest.json new file mode 100644 index 000000000..620fb2248 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/manifest.json @@ -0,0 +1,74 @@ +{ + "type": "extension", + "name": "elevenlabs_tts", + "version": "0.1.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime", + "version": "0.1.0" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.1.0" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "model_id": { + "type": "string" + }, + "request_timeout_seconds": { + "type": "int64" + }, + "similarity_boost": { + "type": "float64" + }, + "speaker_boost": { + "type": "bool" + }, + "stability": { + "type": "float64" + }, + "style": { + "type": "float64" + }, + "optimize_streaming_latency": { + "type": "int64" + }, + "voice_id": { + "type": "string" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ], + "pcm_frame_out": [ + { + "name": "pcm_frame" + } + ] + } +} diff --git a/agents/addon/extension/elevenlabs_tts/pcm.go b/agents/addon/extension/elevenlabs_tts/pcm.go new file mode 100644 index 000000000..c3454b102 --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/pcm.go @@ -0,0 +1,104 @@ +/** + * + * Agora Real Time Engagement + * Created by XinHui Li in 2024-07. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "fmt" + "log/slog" + + "agora.io/rte/rtego" +) + +type pcm struct { + config *pcmConfig +} + +type pcmConfig struct { + BytesPerSample int32 + Channel int32 + ChannelLayout uint64 + Name string + SampleRate int32 + SamplesPerChannel int32 + Timestamp int64 +} + +func defaultPcmConfig() *pcmConfig { + return &pcmConfig{ + BytesPerSample: 2, + Channel: 1, + ChannelLayout: 1, + Name: "pcm_frame", + SampleRate: 16000, + SamplesPerChannel: 16000 / 100, + Timestamp: 0, + } +} + +func newPcm(config *pcmConfig) *pcm { + return &pcm{ + config: config, + } +} + +func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rtego.PcmFrame, err error) { + pcmFrame, err = rtego.NewPcmFrame(p.config.Name) + if err != nil { + slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) + return + } + + // set pcm frame + pcmFrame.SetBytesPerSample(p.config.BytesPerSample) + pcmFrame.SetSampleRate(p.config.SampleRate) + pcmFrame.SetChannelLayout(p.config.ChannelLayout) + pcmFrame.SetNumberOfChannels(p.config.Channel) + pcmFrame.SetTimestamp(p.config.Timestamp) + pcmFrame.SetDataFmt(rtego.PcmFrameDataFmtInterleave) + pcmFrame.SetSamplesPerChannel(p.config.SamplesPerChannel) + pcmFrame.AllocBuf(p.getPcmFrameSize()) + + borrowedBuf, err := pcmFrame.BorrowBuf() + if err != nil { + slog.Error(fmt.Sprintf("BorrowBuf failed, err: %v", err), logTag) + return + } + + // copy data + copy(borrowedBuf, buf) + + pcmFrame.GiveBackBuf(&borrowedBuf) + return +} + +func (p *pcm) getPcmFrameSize() int { + return int(p.config.SamplesPerChannel * p.config.Channel * p.config.BytesPerSample) +} + +func (p *pcm) newBuf() []byte { + return make([]byte, p.getPcmFrameSize()) +} + +func (p *pcm) send(rte rtego.Rte, buf []byte) (err error) { + pcmFrame, err := p.getPcmFrame(buf) + if err != nil { + slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) + return + } + + // send pcm + if err = rte.SendPcmFrame(pcmFrame); err != nil { + slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) + return + } + + return +} diff --git a/agents/addon/extension/elevenlabs_tts/property.json b/agents/addon/extension/elevenlabs_tts/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/elevenlabs_tts/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector/extension.go b/agents/addon/extension/interrupt_detector/extension.go new file mode 100644 index 000000000..962940e06 --- /dev/null +++ b/agents/addon/extension/interrupt_detector/extension.go @@ -0,0 +1,78 @@ +/** + * + * Agora Real Time Engagement + * Created by Wei Hu in 2022-10. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "fmt" + "log/slog" + + "agora.io/rte/rtego" +) + +const ( + textDataTextField = "text" + textDataFinalField = "is_final" + + cmdNameFlush = "flush" +) + +var ( + logTag = slog.String("extension", "INTERRUPT_DETECTOR_EXTENSION") +) + +type interruptDetectorExtension struct { + rtego.DefaultExtension +} + +func newExtension(name string) rtego.Extension { + return &interruptDetectorExtension{} +} + +// OnData receives data from rte graph. +// current supported data: +// - name: text_data +// example: +// {name: text_data, properties: {text: "hello", is_final: false} +func (p *interruptDetectorExtension) OnData( + rte rtego.Rte, + data rtego.Data, +) { + text, err := data.GetPropertyString(textDataTextField) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataTextField, err), logTag) + return + } + + final, err := data.GetPropertyBool(textDataFinalField) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s error: %v", textDataFinalField, err), logTag) + return + } + + slog.Debug(fmt.Sprintf("OnData %s: %s %s: %t", textDataTextField, text, textDataFinalField, final), logTag) + + if final || len(text) >= 2 { + flushCmd, _ := rtego.NewCmd(cmdNameFlush) + rte.SendCmd(flushCmd, nil) + + slog.Info(fmt.Sprintf("sent cmd: %s", cmdNameFlush), logTag) + } +} + +func init() { + slog.Info("interrupt_detector extension init", logTag) + + // Register addon + rtego.RegisterAddonAsExtension( + "interrupt_detector", + rtego.NewDefaultExtensionAddon(newExtension), + ) +} diff --git a/agents/addon/extension/interrupt_detector/go.mod b/agents/addon/extension/interrupt_detector/go.mod new file mode 100644 index 000000000..bced26e06 --- /dev/null +++ b/agents/addon/extension/interrupt_detector/go.mod @@ -0,0 +1,7 @@ +module extension + +go 1.18 + +replace agora.io/rte => ../../../interface + +require agora.io/rte v0.0.0-00010101000000-000000000000 diff --git a/agents/addon/extension/interrupt_detector/manifest.json b/agents/addon/extension/interrupt_detector/manifest.json new file mode 100644 index 000000000..05781cf5a --- /dev/null +++ b/agents/addon/extension/interrupt_detector/manifest.json @@ -0,0 +1,38 @@ +{ + "type": "extension", + "name": "interrupt_detector", + "version": "0.1.0", + "language": "go", + "dependencies": [ + { + "type": "system", + "name": "rte_runtime", + "version": "0.1.0" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.1.0" + } + ], + "api": { + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector/property.json b/agents/addon/extension/interrupt_detector/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/interrupt_detector/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/README.md b/agents/addon/extension/openai_chatgpt/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/agents/addon/extension/openai_chatgpt/go.mod b/agents/addon/extension/openai_chatgpt/go.mod new file mode 100644 index 000000000..5bb6b52b8 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/go.mod @@ -0,0 +1,17 @@ +module openai_chatgpt + +go 1.21 + +replace agora.io/rte => ../../../interface + +require ( + agora.io/rte v0.0.0-00010101000000-000000000000 + github.com/sashabaranov/go-openai v1.24.1 + github.com/stretchr/testify v1.9.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/agents/addon/extension/openai_chatgpt/go.sum b/agents/addon/extension/openai_chatgpt/go.sum new file mode 100644 index 000000000..64a09f354 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/go.sum @@ -0,0 +1,12 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sashabaranov/go-openai v1.24.1 h1:DWK95XViNb+agQtuzsn+FyHhn3HQJ7Va8z04DQDJ1MI= +github.com/sashabaranov/go-openai v1.24.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/agents/addon/extension/openai_chatgpt/manifest.json b/agents/addon/extension/openai_chatgpt/manifest.json new file mode 100644 index 000000000..aa4f6d50d --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/manifest.json @@ -0,0 +1,83 @@ +{ + "type": "extension", + "name": "openai_chatgpt", + "version": "0.1.0", + "language": "go", + "support": [], + "dependencies": [ + { + "type": "system", + "name": "rte_runtime", + "version": "0.1.0" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.1.0" + } + ], + "api": { + "property": { + "api_key": { + "type": "string" + }, + "frequency_penalty": { + "type": "float64" + }, + "presence_penalty": { + "type": "float64" + }, + "model": { + "type": "string" + }, + "max_tokens": { + "type": "int64" + }, + "prompt": { + "type": "string" + }, + "greeting": { + "type": "string" + }, + "max_memory_length": { + "type": "int64" + } + }, + "data_in": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } + } + } + ], + "data_out": [ + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "end_of_segment": { + "type": "bool" + } + } + } + ], + "cmd_in": [ + { + "name": "flush" + } + ], + "cmd_out": [ + { + "name": "flush" + } + ] + } +} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt.go new file mode 100644 index 000000000..1a09ed141 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/openai_chatgpt.go @@ -0,0 +1,111 @@ +/** + * + * Agora Real Time Engagement + * Created by lixinhui in 2024. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "context" + "fmt" + "math/rand" + "net/http" + "net/url" + + openai "github.com/sashabaranov/go-openai" +) + +type openaiChatGPT struct { + client *openai.Client + config openaiChatGPTConfig +} + +type openaiChatGPTConfig struct { + BaseUrl string + ApiKey string + + Model string + Prompt string + + FrequencyPenalty float32 + PresencePenalty float32 + TopP float32 + Temperature float32 + MaxTokens int + Seed int + + ProxyUrl string +} + +func defaultOpenaiChatGPTConfig() openaiChatGPTConfig { + return openaiChatGPTConfig{ + BaseUrl: "https://api.openai.com/v1", + ApiKey: "", + + Model: openai.GPT4o, + Prompt: "You are a voice assistant who talks in a conversational way and can chat with me like my friends. i will speak to you in english or chinese, and you will answer in the corrected and improved version of my text with the language i use. Don't talk like a robot, instead i would like you to talk like real human with emotions. i will use your answer for text-to-speech, so don't return me any meaningless characters. I want you to be helpful, when i'm asking you for advices, give me precise, practical and useful advices instead of being vague. When giving me list of options, express the options in a narrative way instead of bullet points.", + + FrequencyPenalty: 0.9, + PresencePenalty: 0.9, + TopP: 1.0, + Temperature: 0.1, + MaxTokens: 512, + Seed: rand.Int(), + + ProxyUrl: "", + } +} + +func newOpenaiChatGPT(config openaiChatGPTConfig) (*openaiChatGPT, error) { + conf := openai.DefaultConfig(config.ApiKey) + + if config.BaseUrl != "" { + conf.BaseURL = config.BaseUrl + } + + if config.ProxyUrl != "" { + proxyUrl, err := url.Parse(config.ProxyUrl) + if err != nil { + return nil, fmt.Errorf("newOpenaiChatGPT failed on parsing proxy url, err: %v", err) + } + conf.HTTPClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyUrl)}} + } + + return &openaiChatGPT{ + config: config, + client: openai.NewClientWithConfig(conf), + }, nil +} + +func (c *openaiChatGPT) getChatCompletionsStream(messages []openai.ChatCompletionMessage) (*openai.ChatCompletionStream, error) { + req := openai.ChatCompletionRequest{ + Temperature: c.config.Temperature, + TopP: c.config.TopP, + PresencePenalty: c.config.PresencePenalty, + FrequencyPenalty: c.config.FrequencyPenalty, + MaxTokens: c.config.MaxTokens, + Seed: &c.config.Seed, + Messages: append( + []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: c.config.Prompt, + }, + }, + messages..., + ), + Model: c.config.Model, + Stream: true, + } + + resp, err := c.client.CreateChatCompletionStream(context.Background(), req) + if err != nil { + return nil, fmt.Errorf("CreateChatCompletionStream failed,err: %v", err) + } + return resp, nil +} diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go new file mode 100644 index 000000000..6abdaa515 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go @@ -0,0 +1,391 @@ +/** + * + * Agora Real Time Engagement + * Created by lixinhui in 2024. + * Copyright (c) 2024 Agora IO. All rights reserved. + * + */ +// Note that this is just an example extension written in the GO programming +// language, so the package name does not equal to the containing directory +// name. However, it is not common in Go. +package extension + +import ( + "errors" + "fmt" + "io" + "log/slog" + "sync" + "sync/atomic" + "time" + + "agora.io/rte/rtego" + openai "github.com/sashabaranov/go-openai" +) + +var ( + logTag = slog.String("extension", "OPENAI_CHATGPT_EXTENSION") +) + +type openaiChatGPTExtension struct { + rtego.DefaultExtension + openaiChatGPT *openaiChatGPT +} + +const ( + cmdInFlush = "flush" + cmdOutFlush = "flush" + dataInTextDataPropertyText = "text" + dataInTextDataPropertyIsFinal = "is_final" + dataOutTextDataPropertyText = "text" + dataOutTextDataPropertyTextEndOfSegment = "end_of_segment" + + propertyBaseUrl = "base_url" // Optional + propertyApiKey = "api_key" // Required + propertyModel = "model" // Optional + propertyPrompt = "prompt" // Optional + propertyFrequencyPenalty = "frequency_penalty" // Optional + propertyPresencePenalty = "presence_penalty" // Optional + propertyTemperature = "temperature" // Optional + propertyTopP = "top_p" // Optional + propertyMaxTokens = "max_tokens" // Optional + propertyGreeting = "greeting" // Optional + propertyProxyUrl = "proxy_url" // Optional + propertyMaxMemoryLength = "max_memory_length" // Optional +) + +var ( + memory []openai.ChatCompletionMessage + memoryChan chan openai.ChatCompletionMessage + maxMemoryLength = 10 + + outdateTs atomic.Int64 + wg sync.WaitGroup +) + +func newChatGPTExtension(name string) rtego.Extension { + return &openaiChatGPTExtension{} +} + +// OnStart will be called when the extension is starting, +// properies can be read here to initialize and start the extension. +// current supported properties: +// - api_key (required) +// - model +// - prompt +// - frequency_penalty +// - presence_penalty +// - temperature +// - top_p +// - max_tokens +// - greeting +// - proxy_url +func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { + slog.Info("OnStart", logTag) + + // prepare configuration + openaiChatGPTConfig := defaultOpenaiChatGPTConfig() + + if baseUrl, err := rte.GetPropertyString(propertyBaseUrl); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyBaseUrl, err), logTag) + } else { + if len(baseUrl) > 0 { + openaiChatGPTConfig.BaseUrl = baseUrl + } + } + + if apiKey, err := rte.GetPropertyString(propertyApiKey); err != nil { + slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) + return + } else { + openaiChatGPTConfig.ApiKey = apiKey + } + + if model, err := rte.GetPropertyString(propertyModel); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) + } else { + if len(model) > 0 { + openaiChatGPTConfig.Model = model + } + } + + if prompt, err := rte.GetPropertyString(propertyPrompt); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) + } else { + if len(prompt) > 0 { + openaiChatGPTConfig.Prompt = prompt + } + } + + if frequencyPenalty, err := rte.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyFrequencyPenalty, err), logTag) + } else { + openaiChatGPTConfig.FrequencyPenalty = float32(frequencyPenalty) + } + + if presencePenalty, err := rte.GetPropertyFloat64(propertyPresencePenalty); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyPresencePenalty, err), logTag) + } else { + openaiChatGPTConfig.PresencePenalty = float32(presencePenalty) + } + + if temperature, err := rte.GetPropertyFloat64(propertyTemperature); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) + } else { + openaiChatGPTConfig.Temperature = float32(temperature) + } + + if topP, err := rte.GetPropertyFloat64(propertyTopP); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) + } else { + openaiChatGPTConfig.TopP = float32(topP) + } + + if maxTokens, err := rte.GetPropertyInt64(propertyMaxTokens); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) + } else { + if maxTokens > 0 { + openaiChatGPTConfig.MaxTokens = int(maxTokens) + } + } + + if proxyUrl, err := rte.GetPropertyString(propertyProxyUrl); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyProxyUrl, err), logTag) + } else { + openaiChatGPTConfig.ProxyUrl = proxyUrl + } + + greeting, err := rte.GetPropertyString(propertyGreeting) + if err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) + } + + if propMaxMemoryLength, err := rte.GetPropertyInt64(propertyMaxMemoryLength); err != nil { + slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) + } else { + if propMaxMemoryLength > 0 { + maxMemoryLength = int(propMaxMemoryLength) + } + } + + // create openaiChatGPT instance + openaiChatgpt, err := newOpenaiChatGPT(openaiChatGPTConfig) + if err != nil { + slog.Error(fmt.Sprintf("newOpenaiChatGPT failed, err: %v", err), logTag) + return + } + slog.Info(fmt.Sprintf("newOpenaiChatGPT succeed with max_tokens: %d, model: %s", + openaiChatGPTConfig.MaxTokens, openaiChatGPTConfig.Model), logTag) + + p.openaiChatGPT = openaiChatgpt + + memoryChan = make(chan openai.ChatCompletionMessage, maxMemoryLength*2) + + // send greeting if available + if len(greeting) > 0 { + outputData, _ := rtego.NewData("text_data") + outputData.SetProperty(dataOutTextDataPropertyText, greeting) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) + } else { + slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) + } + } + + rte.OnStartDone() +} + +// OnCmd receives cmd from rte graph. +// current supported cmd: +// - name: flush +// example: +// {"name": "flush"} +func (p *openaiChatGPTExtension) OnCmd( + rte rtego.Rte, + cmd rtego.Cmd, +) { + cmdName, err := cmd.CmdName() + if err != nil { + slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) + rte.ReturnString(rtego.Error, "error", cmd) + return + } + slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) + + switch cmdName { + case cmdInFlush: + outdateTs.Store(time.Now().UnixMicro()) + + wg.Wait() // wait for chat completion stream to finish + + // send out + outCmd, err := rtego.NewCmd(cmdOutFlush) + if err != nil { + slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) + rte.ReturnString(rtego.Error, "error", cmd) + return + } + if err := rte.SendCmd(outCmd, nil); err != nil { + slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) + rte.ReturnString(rtego.Error, "error", cmd) + return + } else { + slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) + } + } + rte.ReturnString(rtego.Ok, "ok", cmd) +} + +// OnData receives data from rte graph. +// current supported data: +// - name: text_data +// example: +// {"name": "text_data", "properties": {"text": "hello", "is_final": true} +func (p *openaiChatGPTExtension) OnData( + rte rtego.Rte, + data rtego.Data, +) { + // Get isFinal + isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) + if err != nil { + slog.Warn(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyIsFinal, err), logTag) + return + } + if !isFinal { // ignore non-final + slog.Debug("ignore non-final input", logTag) + return + } + + // Get input text + inputText, err := data.GetPropertyString(dataInTextDataPropertyText) + if err != nil { + slog.Error(fmt.Sprintf("OnData GetProperty %s failed, err: %v", dataInTextDataPropertyText, err), logTag) + return + } + if len(inputText) == 0 { + slog.Debug("ignore empty text", logTag) + return + } + slog.Info(fmt.Sprintf("OnData input text: [%s]", inputText), logTag) + + // prepare memory + for len(memoryChan) > 0 { + m, ok := <-memoryChan + if !ok { + break + } + memory = append(memory, m) + if len(memory) > maxMemoryLength { + memory = memory[1:] + } + } + memory = append(memory, openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleUser, + Content: inputText, + }) + if len(memory) > maxMemoryLength { + memory = memory[1:] + } + + // start goroutine to request and read responses from openai + wg.Add(1) + go func(startTime time.Time, inputText string, memory []openai.ChatCompletionMessage) { + defer wg.Done() + slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] memory: %v", inputText, memory), logTag) + + // Get result from ai + resp, err := p.openaiChatGPT.getChatCompletionsStream(memory) + if err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] failed, err: %v", inputText, err), logTag) + return + } + defer func() { + if resp != nil { // Close stream object + resp.Close() + } + }() + slog.Debug(fmt.Sprintf("GetChatCompletionsStream start to recv for input text: [%s]", inputText), logTag) + + var sentence, fullContent string + var firstSentenceSent bool + for { + if startTime.UnixMicro() < outdateTs.Load() { // Check whether to interrupt + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv interrupt and flushing for input text: [%s], startTs: %d, outdateTs: %d", + inputText, startTime.UnixMicro(), outdateTs.Load()), logTag) + break + } + + chatCompletions, err := resp.Recv() + if errors.Is(err, io.EOF) { + slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s], io.EOF break", inputText), logTag) + break + } + + var content string + if len(chatCompletions.Choices) > 0 && chatCompletions.Choices[0].Delta.Content != "" { + content = chatCompletions.Choices[0].Delta.Content + } + fullContent += content + + for { + // feed content and check whether sentence is available + var sentenceIsFinal bool + sentence, content, sentenceIsFinal = parseSentence(sentence, content) + if len(sentence) == 0 || !sentenceIsFinal { + slog.Debug(fmt.Sprintf("sentence %s is empty or not final", sentence), logTag) + break + } + slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) + + // send sentence + outputData, err := rtego.NewData("text_data") + if err != nil { + slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) + break + } + outputData.SetProperty(dataOutTextDataPropertyText, sentence) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) + break + } else { + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] sent sentence [%s]", inputText, sentence), logTag) + } + sentence = "" + + if !firstSentenceSent { + firstSentenceSent = true + slog.Info(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] first sentence sent, first_sentency_latency %dms", + inputText, time.Since(startTime).Milliseconds()), logTag) + } + } + } + + // remember response as assistant content in memory + memoryChan <- openai.ChatCompletionMessage{ + Role: openai.ChatMessageRoleAssistant, + Content: fullContent, + } + + // send end of segment + outputData, _ := rtego.NewData("text_data") + outputData.SetProperty(dataOutTextDataPropertyText, sentence) + outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) + if err := rte.SendData(outputData); err != nil { + slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) + } else { + slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) + } + }(time.Now(), inputText, append([]openai.ChatCompletionMessage{}, memory...)) +} + +func init() { + slog.Info("init") + + // Register addon + rtego.RegisterAddonAsExtension( + "openai_chatgpt", + rtego.NewDefaultExtensionAddon(newChatGPTExtension), + ) +} diff --git a/agents/addon/extension/openai_chatgpt/property.json b/agents/addon/extension/openai_chatgpt/property.json new file mode 100644 index 000000000..9e26dfeeb --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/property.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt/sentence.go b/agents/addon/extension/openai_chatgpt/sentence.go new file mode 100644 index 000000000..e9b9d3104 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/sentence.go @@ -0,0 +1,30 @@ +package extension + +func isPunctuation(r rune) bool { + if r == ',' || r == ',' || + r == '.' || r == '。' || + r == '?' || r == '?' || + r == '!' || r == '!' { + return true + } + return false +} + +func parseSentence(sentence, content string) (string, string, bool) { + var remain string + var foundPunc bool + + for _, r := range content { + if !foundPunc { + sentence += string(r) + } else { + remain += string(r) + } + + if !foundPunc && isPunctuation(r) { + foundPunc = true + } + } + + return sentence, remain, foundPunc +} diff --git a/agents/addon/extension/openai_chatgpt/sentence_test.go b/agents/addon/extension/openai_chatgpt/sentence_test.go new file mode 100644 index 000000000..b09fe3076 --- /dev/null +++ b/agents/addon/extension/openai_chatgpt/sentence_test.go @@ -0,0 +1,150 @@ +package extension + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestIsPunctuation(t *testing.T) { + cases := []struct { + r rune + expect bool + }{ + {',', true}, + {',', true}, + {'.', true}, + {'。', true}, + {'?', true}, + {'?', true}, + {'!', true}, + {'!', true}, + + {'a', false}, + {'0', false}, + } + + for i, c := range cases { + require.Equal(t, c.expect, isPunctuation(c.r), "case %d", i) + } +} + +func TestSplitByPunctuation(t *testing.T) { + cases := []struct { + s string + expect []string + }{ + {"Hello world!", []string{"Hello world"}}, + {"Hey, there!", []string{"Hey", " there"}}, + } + + for i, c := range cases { + out := strings.FieldsFunc(c.s, isPunctuation) + require.Equal(t, c.expect, out, "case %d", i) + } +} + +func TestParseSentence_Should_NoFinalSentence(t *testing.T) { + cases := []struct { + sentence string + content string + + expectSentence string + expectContent string + }{ + { + sentence: "", + content: "", + expectSentence: "", + expectContent: "", + }, + { + sentence: "a", + content: "", + expectSentence: "a", + expectContent: "", + }, + { + sentence: "", + content: "a", + expectSentence: "a", + expectContent: "", + }, + { + sentence: "abc", + content: "ddd", + expectSentence: "abcddd", + expectContent: "", + }, + } + + for i, c := range cases { + sentence, content, final := parseSentence(c.sentence, c.content) + require.False(t, final, "case %d", i) + + require.Equal(t, c.expectSentence, sentence, "case %d", i) + require.Equal(t, c.expectContent, content, "case %d", i) + } +} + +func TestParseSentence_Should_FinalSentence(t *testing.T) { + cases := []struct { + sentence string + content string + + expectSentence string + expectContent string + }{ + { + sentence: "", + content: ",", + expectSentence: ",", + expectContent: "", + }, + { + sentence: "", + content: ",ddd", + expectSentence: ",", + expectContent: "ddd", + }, + { + sentence: "abc", + content: ",ddd", + expectSentence: "abc,", + expectContent: "ddd", + }, + { + sentence: "abc", + content: "dd,d", + expectSentence: "abcdd,", + expectContent: "d", + }, + { + sentence: "abc", + content: "ddd,", + expectSentence: "abcddd,", + expectContent: "", + }, + { + sentence: "abc", + content: "ddd,eee,fff,", + expectSentence: "abcddd,", + expectContent: "eee,fff,", + }, + { + sentence: "我的", + content: "你好,啊!", + expectSentence: "我的你好,", + expectContent: "啊!", + }, + } + + for i, c := range cases { + sentence, content, final := parseSentence(c.sentence, c.content) + require.True(t, final, "case %d", i) + + require.Equal(t, c.expectSentence, sentence, "case %d", i) + require.Equal(t, c.expectContent, content, "case %d", i) + } +} From ffe92851a8499ebf722bdc80116477b296aea289 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 15:33:00 +0000 Subject: [PATCH 55/72] fix: build with 0.3 runtime --- Makefile | 2 +- agents/.gitignore | 2 + .../addon/extension/azure_tts/manifest.json | 2 +- agents/addon/extension/azure_tts/src/main.cc | 22 +++--- .../extension/chat_transcriber/extension.go | 18 ++--- .../elevenlabs_tts_extension.go | 38 +++++----- agents/addon/extension/elevenlabs_tts/pcm.go | 18 ++--- .../extension/interrupt_detector/extension.go | 18 ++--- .../openai_chatgpt_extension.go | 76 ++++++++++--------- agents/main.go | 20 ++--- agents/manifest.json.en.example | 26 +++++-- agents/scripts/install_deps_and_build.sh | 23 +++++- 12 files changed, 151 insertions(+), 114 deletions(-) diff --git a/Makefile b/Makefile index edec54213..a27187110 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ build: build-agents build-server build-agents: @echo ">> build agents" - cd agents && ./scripts/install_deps_and_build.sh linux x64 + cd agents && ./scripts/install_deps_and_build.sh linux x64 && mv ./bin/main ./bin/worker @echo ">> done" build-server: diff --git a/agents/.gitignore b/agents/.gitignore index 460c5c138..46de78875 100644 --- a/agents/.gitignore +++ b/agents/.gitignore @@ -1,7 +1,9 @@ *.log addon/extension_group/ addon/extension/agora_rtc +addon/extension/py_init_extension_cpp addon/system +.rte agoradns.dat agorareport.dat bin/ diff --git a/agents/addon/extension/azure_tts/manifest.json b/agents/addon/extension/azure_tts/manifest.json index c1b72bb80..5064033bd 100644 --- a/agents/addon/extension/azure_tts/manifest.json +++ b/agents/addon/extension/azure_tts/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "system", diff --git a/agents/addon/extension/azure_tts/src/main.cc b/agents/addon/extension/azure_tts/src/main.cc index d716b5b08..272fc47d9 100644 --- a/agents/addon/extension/azure_tts/src/main.cc +++ b/agents/addon/extension/azure_tts/src/main.cc @@ -13,10 +13,6 @@ #include "log.h" #include "macro/check.h" -#include "rte_runtime/binding/cpp/internal/msg/cmd/cmd.h" -#include "rte_runtime/binding/cpp/internal/msg/cmd_result.h" -#include "rte_runtime/binding/cpp/internal/msg/pcm_frame.h" -#include "rte_runtime/binding/cpp/internal/rte_proxy.h" #include "rte_runtime/binding/cpp/rte.h" #include "tts.h" @@ -32,7 +28,7 @@ class azure_tts_extension_t : public rte::extension_t { // - azure_subscription_key // - azure_subscription_region // - azure_synthesis_voice_name - void on_start(rte::rte_t &rte) override { + void on_start(rte::rte_env_t &rte) override { AZURE_TTS_LOGI("start"); // read properties @@ -46,7 +42,7 @@ class azure_tts_extension_t : public rte::extension_t { return; } - rte_proxy_ = std::unique_ptr(rte::rte_proxy_t::create(rte)); + rte_proxy_ = std::unique_ptr(rte::rte_env_proxy_t::create(rte)); RTE_ASSERT(rte_proxy_ != nullptr, "rte_proxy should not be nullptr"); // pcm parameters @@ -80,7 +76,7 @@ class azure_tts_extension_t : public rte::extension_t { auto pcm_frame_shared = std::make_shared>(std::move(pcm_frame)); rte_proxy->notify( - [frame = std::move(pcm_frame_shared)](rte::rte_t &rte) { rte.send_pcm_frame(std::move(*frame)); }); + [frame = std::move(pcm_frame_shared)](rte::rte_env_t &rte) { rte.send_pcm_frame(std::move(*frame)); }); }; @@ -105,7 +101,7 @@ class azure_tts_extension_t : public rte::extension_t { // - name: flush // example: // {"name": "flush"} - void on_cmd(rte::rte_t &rte, std::unique_ptr cmd) override { + void on_cmd(rte::rte_env_t &rte, std::unique_ptr cmd) override { std::string command = cmd->get_name(); AZURE_TTS_LOGI("%s", command.c_str()); @@ -116,8 +112,8 @@ class azure_tts_extension_t : public rte::extension_t { // passthrough cmd auto ret = rte.send_cmd(rte::cmd_t::create(kCmdNameFlush.c_str())); - if (ret != RTE_STATUS_CODE_OK) { - AZURE_TTS_LOGE("Failed to send cmd %s, ret:%d", kCmdNameFlush.c_str(), int(ret)); + if (!ret) { + AZURE_TTS_LOGE("Failed to send cmd %s", kCmdNameFlush.c_str()); rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_ERROR), std::move(cmd)); } else { rte.return_result(rte::cmd_result_t::create(RTE_STATUS_CODE_OK), std::move(cmd)); @@ -132,7 +128,7 @@ class azure_tts_extension_t : public rte::extension_t { // - name: text_data // example: // {"name": "text_data", "properties": {"text": "hello"} - void on_data(rte::rte_t &rte, std::unique_ptr data) override { + void on_data(rte::rte_env_t &rte, std::unique_ptr data) override { auto text = data->get_property_string(kDataFieldText.c_str()); if (text.empty()) { @@ -146,7 +142,7 @@ class azure_tts_extension_t : public rte::extension_t { } // on_stop will be called when the extension is stopping. - void on_stop(rte::rte_t &rte) override { + void on_stop(rte::rte_env_t &rte) override { AZURE_TTS_LOGI("stop"); if (azure_tts_) { azure_tts_->Stop(); @@ -160,7 +156,7 @@ class azure_tts_extension_t : public rte::extension_t { } private: - std::unique_ptr rte_proxy_; + std::unique_ptr rte_proxy_; std::unique_ptr azure_tts_; diff --git a/agents/addon/extension/chat_transcriber/extension.go b/agents/addon/extension/chat_transcriber/extension.go index 33fba48ab..d684e7c60 100644 --- a/agents/addon/extension/chat_transcriber/extension.go +++ b/agents/addon/extension/chat_transcriber/extension.go @@ -13,7 +13,7 @@ import ( "log/slog" "time" - "agora.io/rte/rtego" + "agora.io/rte/rte" "google.golang.org/protobuf/proto" ) @@ -29,12 +29,12 @@ var ( ) type chatTranscriberExtension struct { - rtego.DefaultExtension + rte.DefaultExtension cachedTextMap map[uint32]string // record the cached text data for each stream id } -func newExtension(name string) rtego.Extension { +func newExtension(name string) rte.Extension { return &chatTranscriberExtension{ cachedTextMap: make(map[uint32]string), } @@ -46,8 +46,8 @@ func newExtension(name string) rtego.Extension { // example: // {"name": "text_data", "properties": {"text": "hello", "is_final": true, "stream_id": 123, "end_of_segment": true}} func (p *chatTranscriberExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { // Get the text data from data. text, err := data.GetPropertyString(textDataTextField) @@ -126,22 +126,22 @@ func (p *chatTranscriberExtension) OnData( } // convert the origin text data to the protobuf data and send it to the graph. - rteData, err := rtego.NewData("data") + rteData, err := rte.NewData("data") rteData.SetPropertyBytes("data", pbData) if err != nil { slog.Warn(fmt.Sprintf("OnData NewData error: %v", err), logTag) return } - rte.SendData(rteData) + rteEnv.SendData(rteData) } func init() { slog.Info("chat_transcriber extension init", logTag) // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "chat_transcriber", - rtego.NewDefaultExtensionAddon(newExtension), + rte.NewDefaultExtensionAddon(newExtension), ) } diff --git a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go index 3b6ef4fd7..4ca3b9de7 100644 --- a/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go +++ b/agents/addon/extension/elevenlabs_tts/elevenlabs_tts_extension.go @@ -18,7 +18,7 @@ import ( "sync/atomic" "time" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) const ( @@ -50,7 +50,7 @@ var ( ) type elevenlabsTTSExtension struct { - rtego.DefaultExtension + rte.DefaultExtension elevenlabsTTS *elevenlabsTTS } @@ -59,7 +59,7 @@ type message struct { receivedTs int64 } -func newElevenlabsTTSExtension(name string) rtego.Extension { +func newElevenlabsTTSExtension(name string) rte.Extension { return &elevenlabsTTSExtension{} } @@ -75,7 +75,7 @@ func newElevenlabsTTSExtension(name string) rtego.Extension { // - stability // - style // - voice_id -func (e *elevenlabsTTSExtension) OnStart(rte rtego.Rte) { +func (e *elevenlabsTTSExtension) OnStart(rte rte.RteEnv) { slog.Info("OnStart", logTag) // prepare configuration @@ -266,13 +266,14 @@ func (e *elevenlabsTTSExtension) OnStart(rte rtego.Rte) { // example: // {"name": "flush"} func (e *elevenlabsTTSExtension) OnCmd( - rte rtego.Rte, - cmd rtego.Cmd, + rteEnv rte.RteEnv, + cmd rte.Cmd, ) { - cmdName, err := cmd.CmdName() + cmdName, err := cmd.GetName() if err != nil { slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } @@ -283,23 +284,26 @@ func (e *elevenlabsTTSExtension) OnCmd( outdateTs.Store(time.Now().UnixMicro()) // send out - outCmd, err := rtego.NewCmd(cmdOutFlush) + outCmd, err := rte.NewCmd(cmdOutFlush) if err != nil { slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } - if err := rte.SendCmd(outCmd, nil); err != nil { + if err := rteEnv.SendCmd(outCmd, nil); err != nil { slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } else { slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) } } - rte.ReturnString(rtego.Ok, "ok", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Ok) + rteEnv.ReturnResult(cmdResult, cmd) } // OnData receives data from rte graph. @@ -308,8 +312,8 @@ func (e *elevenlabsTTSExtension) OnCmd( // example: // {name: text_data, properties: {text: "hello"} func (e *elevenlabsTTSExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { text, err := data.GetPropertyString(dataInTextDataPropertyText) if err != nil { @@ -333,8 +337,8 @@ func init() { slog.Info("elevenlabs_tts extension init", logTag) // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "elevenlabs_tts", - rtego.NewDefaultExtensionAddon(newElevenlabsTTSExtension), + rte.NewDefaultExtensionAddon(newElevenlabsTTSExtension), ) } diff --git a/agents/addon/extension/elevenlabs_tts/pcm.go b/agents/addon/extension/elevenlabs_tts/pcm.go index c3454b102..c135d9d40 100644 --- a/agents/addon/extension/elevenlabs_tts/pcm.go +++ b/agents/addon/extension/elevenlabs_tts/pcm.go @@ -14,7 +14,7 @@ import ( "fmt" "log/slog" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) type pcm struct { @@ -49,8 +49,8 @@ func newPcm(config *pcmConfig) *pcm { } } -func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rtego.PcmFrame, err error) { - pcmFrame, err = rtego.NewPcmFrame(p.config.Name) +func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rte.PcmFrame, err error) { + pcmFrame, err = rte.NewPcmFrame(p.config.Name) if err != nil { slog.Error(fmt.Sprintf("NewPcmFrame failed, err: %v", err), logTag) return @@ -62,20 +62,20 @@ func (p *pcm) getPcmFrame(buf []byte) (pcmFrame rtego.PcmFrame, err error) { pcmFrame.SetChannelLayout(p.config.ChannelLayout) pcmFrame.SetNumberOfChannels(p.config.Channel) pcmFrame.SetTimestamp(p.config.Timestamp) - pcmFrame.SetDataFmt(rtego.PcmFrameDataFmtInterleave) + pcmFrame.SetDataFmt(rte.PcmFrameDataFmtInterleave) pcmFrame.SetSamplesPerChannel(p.config.SamplesPerChannel) pcmFrame.AllocBuf(p.getPcmFrameSize()) - borrowedBuf, err := pcmFrame.BorrowBuf() + borrowedBuf, err := pcmFrame.LockBuf() if err != nil { - slog.Error(fmt.Sprintf("BorrowBuf failed, err: %v", err), logTag) + slog.Error(fmt.Sprintf("LockBuf failed, err: %v", err), logTag) return } // copy data copy(borrowedBuf, buf) - pcmFrame.GiveBackBuf(&borrowedBuf) + pcmFrame.UnlockBuf(&borrowedBuf) return } @@ -87,7 +87,7 @@ func (p *pcm) newBuf() []byte { return make([]byte, p.getPcmFrameSize()) } -func (p *pcm) send(rte rtego.Rte, buf []byte) (err error) { +func (p *pcm) send(rteEnv rte.RteEnv, buf []byte) (err error) { pcmFrame, err := p.getPcmFrame(buf) if err != nil { slog.Error(fmt.Sprintf("getPcmFrame failed, err: %v", err), logTag) @@ -95,7 +95,7 @@ func (p *pcm) send(rte rtego.Rte, buf []byte) (err error) { } // send pcm - if err = rte.SendPcmFrame(pcmFrame); err != nil { + if err = rteEnv.SendPcmFrame(pcmFrame); err != nil { slog.Error(fmt.Sprintf("SendPcmFrame failed, err: %v", err), logTag) return } diff --git a/agents/addon/extension/interrupt_detector/extension.go b/agents/addon/extension/interrupt_detector/extension.go index 962940e06..8fdc201bd 100644 --- a/agents/addon/extension/interrupt_detector/extension.go +++ b/agents/addon/extension/interrupt_detector/extension.go @@ -14,7 +14,7 @@ import ( "fmt" "log/slog" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) const ( @@ -29,10 +29,10 @@ var ( ) type interruptDetectorExtension struct { - rtego.DefaultExtension + rte.DefaultExtension } -func newExtension(name string) rtego.Extension { +func newExtension(name string) rte.Extension { return &interruptDetectorExtension{} } @@ -42,8 +42,8 @@ func newExtension(name string) rtego.Extension { // example: // {name: text_data, properties: {text: "hello", is_final: false} func (p *interruptDetectorExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { text, err := data.GetPropertyString(textDataTextField) if err != nil { @@ -60,8 +60,8 @@ func (p *interruptDetectorExtension) OnData( slog.Debug(fmt.Sprintf("OnData %s: %s %s: %t", textDataTextField, text, textDataFinalField, final), logTag) if final || len(text) >= 2 { - flushCmd, _ := rtego.NewCmd(cmdNameFlush) - rte.SendCmd(flushCmd, nil) + flushCmd, _ := rte.NewCmd(cmdNameFlush) + rteEnv.SendCmd(flushCmd, nil) slog.Info(fmt.Sprintf("sent cmd: %s", cmdNameFlush), logTag) } @@ -71,8 +71,8 @@ func init() { slog.Info("interrupt_detector extension init", logTag) // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "interrupt_detector", - rtego.NewDefaultExtensionAddon(newExtension), + rte.NewDefaultExtensionAddon(newExtension), ) } diff --git a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go index 6abdaa515..a4492f672 100644 --- a/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go +++ b/agents/addon/extension/openai_chatgpt/openai_chatgpt_extension.go @@ -19,7 +19,7 @@ import ( "sync/atomic" "time" - "agora.io/rte/rtego" + "agora.io/rte/rte" openai "github.com/sashabaranov/go-openai" ) @@ -28,7 +28,7 @@ var ( ) type openaiChatGPTExtension struct { - rtego.DefaultExtension + rte.DefaultExtension openaiChatGPT *openaiChatGPT } @@ -63,7 +63,7 @@ var ( wg sync.WaitGroup ) -func newChatGPTExtension(name string) rtego.Extension { +func newChatGPTExtension(name string) rte.Extension { return &openaiChatGPTExtension{} } @@ -80,13 +80,13 @@ func newChatGPTExtension(name string) rtego.Extension { // - max_tokens // - greeting // - proxy_url -func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { +func (p *openaiChatGPTExtension) OnStart(rteEnv rte.RteEnv) { slog.Info("OnStart", logTag) // prepare configuration openaiChatGPTConfig := defaultOpenaiChatGPTConfig() - if baseUrl, err := rte.GetPropertyString(propertyBaseUrl); err != nil { + if baseUrl, err := rteEnv.GetPropertyString(propertyBaseUrl); err != nil { slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyBaseUrl, err), logTag) } else { if len(baseUrl) > 0 { @@ -94,14 +94,14 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if apiKey, err := rte.GetPropertyString(propertyApiKey); err != nil { + if apiKey, err := rteEnv.GetPropertyString(propertyApiKey); err != nil { slog.Error(fmt.Sprintf("GetProperty required %s failed, err: %v", propertyApiKey, err), logTag) return } else { openaiChatGPTConfig.ApiKey = apiKey } - if model, err := rte.GetPropertyString(propertyModel); err != nil { + if model, err := rteEnv.GetPropertyString(propertyModel); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyModel, err), logTag) } else { if len(model) > 0 { @@ -109,7 +109,7 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if prompt, err := rte.GetPropertyString(propertyPrompt); err != nil { + if prompt, err := rteEnv.GetPropertyString(propertyPrompt); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s error:%v", propertyPrompt, err), logTag) } else { if len(prompt) > 0 { @@ -117,31 +117,31 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if frequencyPenalty, err := rte.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { + if frequencyPenalty, err := rteEnv.GetPropertyFloat64(propertyFrequencyPenalty); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyFrequencyPenalty, err), logTag) } else { openaiChatGPTConfig.FrequencyPenalty = float32(frequencyPenalty) } - if presencePenalty, err := rte.GetPropertyFloat64(propertyPresencePenalty); err != nil { + if presencePenalty, err := rteEnv.GetPropertyFloat64(propertyPresencePenalty); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyPresencePenalty, err), logTag) } else { openaiChatGPTConfig.PresencePenalty = float32(presencePenalty) } - if temperature, err := rte.GetPropertyFloat64(propertyTemperature); err != nil { + if temperature, err := rteEnv.GetPropertyFloat64(propertyTemperature); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTemperature, err), logTag) } else { openaiChatGPTConfig.Temperature = float32(temperature) } - if topP, err := rte.GetPropertyFloat64(propertyTopP); err != nil { + if topP, err := rteEnv.GetPropertyFloat64(propertyTopP); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyTopP, err), logTag) } else { openaiChatGPTConfig.TopP = float32(topP) } - if maxTokens, err := rte.GetPropertyInt64(propertyMaxTokens); err != nil { + if maxTokens, err := rteEnv.GetPropertyInt64(propertyMaxTokens); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxTokens, err), logTag) } else { if maxTokens > 0 { @@ -149,18 +149,18 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { } } - if proxyUrl, err := rte.GetPropertyString(propertyProxyUrl); err != nil { + if proxyUrl, err := rteEnv.GetPropertyString(propertyProxyUrl); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyProxyUrl, err), logTag) } else { openaiChatGPTConfig.ProxyUrl = proxyUrl } - greeting, err := rte.GetPropertyString(propertyGreeting) + greeting, err := rteEnv.GetPropertyString(propertyGreeting) if err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyGreeting, err), logTag) } - if propMaxMemoryLength, err := rte.GetPropertyInt64(propertyMaxMemoryLength); err != nil { + if propMaxMemoryLength, err := rteEnv.GetPropertyInt64(propertyMaxMemoryLength); err != nil { slog.Warn(fmt.Sprintf("GetProperty optional %s failed, err: %v", propertyMaxMemoryLength, err), logTag) } else { if propMaxMemoryLength > 0 { @@ -183,17 +183,17 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { // send greeting if available if len(greeting) > 0 { - outputData, _ := rtego.NewData("text_data") + outputData, _ := rte.NewData("text_data") outputData.SetProperty(dataOutTextDataPropertyText, greeting) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := rte.SendData(outputData); err != nil { + if err := rteEnv.SendData(outputData); err != nil { slog.Error(fmt.Sprintf("greeting [%s] send failed, err: %v", greeting, err), logTag) } else { slog.Info(fmt.Sprintf("greeting [%s] sent", greeting), logTag) } } - rte.OnStartDone() + rteEnv.OnStartDone() } // OnCmd receives cmd from rte graph. @@ -202,13 +202,14 @@ func (p *openaiChatGPTExtension) OnStart(rte rtego.Rte) { // example: // {"name": "flush"} func (p *openaiChatGPTExtension) OnCmd( - rte rtego.Rte, - cmd rtego.Cmd, + rteEnv rte.RteEnv, + cmd rte.Cmd, ) { - cmdName, err := cmd.CmdName() + cmdName, err := cmd.GetName() if err != nil { slog.Error(fmt.Sprintf("OnCmd get name failed, err: %v", err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } slog.Info(fmt.Sprintf("OnCmd %s", cmdInFlush), logTag) @@ -220,21 +221,24 @@ func (p *openaiChatGPTExtension) OnCmd( wg.Wait() // wait for chat completion stream to finish // send out - outCmd, err := rtego.NewCmd(cmdOutFlush) + outCmd, err := rte.NewCmd(cmdOutFlush) if err != nil { slog.Error(fmt.Sprintf("new cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } - if err := rte.SendCmd(outCmd, nil); err != nil { + if err := rteEnv.SendCmd(outCmd, nil); err != nil { slog.Error(fmt.Sprintf("send cmd %s failed, err: %v", cmdOutFlush, err), logTag) - rte.ReturnString(rtego.Error, "error", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Error) + rteEnv.ReturnResult(cmdResult, cmd) return } else { slog.Info(fmt.Sprintf("cmd %s sent", cmdOutFlush), logTag) } } - rte.ReturnString(rtego.Ok, "ok", cmd) + cmdResult, _ := rte.NewCmdResult(rte.Ok) + rteEnv.ReturnResult(cmdResult, cmd) } // OnData receives data from rte graph. @@ -243,8 +247,8 @@ func (p *openaiChatGPTExtension) OnCmd( // example: // {"name": "text_data", "properties": {"text": "hello", "is_final": true} func (p *openaiChatGPTExtension) OnData( - rte rtego.Rte, - data rtego.Data, + rteEnv rte.RteEnv, + data rte.Data, ) { // Get isFinal isFinal, err := data.GetPropertyBool(dataInTextDataPropertyIsFinal) @@ -339,14 +343,14 @@ func (p *openaiChatGPTExtension) OnData( slog.Debug(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] got sentence: [%s]", inputText, sentence), logTag) // send sentence - outputData, err := rtego.NewData("text_data") + outputData, err := rte.NewData("text_data") if err != nil { slog.Error(fmt.Sprintf("NewData failed, err: %v", err), logTag) break } outputData.SetProperty(dataOutTextDataPropertyText, sentence) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, false) - if err := rte.SendData(outputData); err != nil { + if err := rteEnv.SendData(outputData); err != nil { slog.Error(fmt.Sprintf("GetChatCompletionsStream recv for input text: [%s] send sentence [%s] failed, err: %v", inputText, sentence, err), logTag) break } else { @@ -369,10 +373,10 @@ func (p *openaiChatGPTExtension) OnData( } // send end of segment - outputData, _ := rtego.NewData("text_data") + outputData, _ := rte.NewData("text_data") outputData.SetProperty(dataOutTextDataPropertyText, sentence) outputData.SetProperty(dataOutTextDataPropertyTextEndOfSegment, true) - if err := rte.SendData(outputData); err != nil { + if err := rteEnv.SendData(outputData); err != nil { slog.Error(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] send failed, err: %v", inputText, sentence, err), logTag) } else { slog.Info(fmt.Sprintf("GetChatCompletionsStream for input text: [%s] end of segment with sentence [%s] sent", inputText, sentence), logTag) @@ -384,8 +388,8 @@ func init() { slog.Info("init") // Register addon - rtego.RegisterAddonAsExtension( + rte.RegisterAddonAsExtension( "openai_chatgpt", - rtego.NewDefaultExtensionAddon(newChatGPTExtension), + rte.NewDefaultExtensionAddon(newChatGPTExtension), ) } diff --git a/agents/main.go b/agents/main.go index 5a2fd1ef5..cf91296df 100644 --- a/agents/main.go +++ b/agents/main.go @@ -11,7 +11,7 @@ import ( "flag" "log" - "agora.io/rte/rtego" + "agora.io/rte/rte" ) type appConfig struct { @@ -19,26 +19,26 @@ type appConfig struct { } type defaultApp struct { - rtego.DefaultApp + rte.DefaultApp cfg *appConfig } func (p *defaultApp) OnInit( - rte rtego.Rte, - manifest rtego.MetadataInfo, - property rtego.MetadataInfo, + rteEnv rte.RteEnv, + manifest rte.MetadataInfo, + property rte.MetadataInfo, ) { // Using the default manifest.json if not specified. if len(p.cfg.Manifest) > 0 { - manifest.Set(rtego.MetadataTypeJSONFileName, p.cfg.Manifest) + manifest.Set(rte.MetadataTypeJSONFileName, p.cfg.Manifest) } - rte.OnInitDone(manifest, property) + rteEnv.OnInitDone(manifest, property) } func startAppBlocking(cfg *appConfig) { - appInstance, err := rtego.NewApp(&defaultApp{ + appInstance, err := rte.NewApp(&defaultApp{ cfg: cfg, }) if err != nil { @@ -47,9 +47,9 @@ func startAppBlocking(cfg *appConfig) { appInstance.Run(true) appInstance.Wait() - rtego.UnloadAllAddons() + rte.UnloadAllAddons() - rtego.EnsureCleanupWhenProcessExit() + rte.EnsureCleanupWhenProcessExit() } func setDefaultLog() { diff --git a/agents/manifest.json.en.example b/agents/manifest.json.en.example index 8494b15c1..496803f0e 100644 --- a/agents/manifest.json.en.example +++ b/agents/manifest.json.en.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "0.2.1" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/scripts/install_deps_and_build.sh b/agents/scripts/install_deps_and_build.sh index 9c0de1221..9965ac882 100755 --- a/agents/scripts/install_deps_and_build.sh +++ b/agents/scripts/install_deps_and_build.sh @@ -43,6 +43,23 @@ build_cxx_addon() { done } +install_python_requirements() { + local app_dir=$1 + + if [[ -f "requirements.txt" ]]; then + pip install -r requirements.txt + fi + + # traverse the addon/extension directory to find the requirements.txt + if [[ -d "addon/extension" ]]; then + for extension in addon/extension/*; do + if [[ -f "$extension/requirements.txt" ]]; then + pip install -r $extension/requirements.txt + fi + done + fi +} + build_go_app() { local app_dir=$1 cd $app_dir @@ -84,8 +101,12 @@ main() { arpm install # build addons and app + echo "build_cxx_addon..." build_cxx_addon $APP_HOME - # build_go_app $APP_HOME + echo "build_go_app..." + build_go_app $APP_HOME + echo "install_python_requirements..." + install_python_requirements $APP_HOME } main "$@" From c6e16a34d2484d9b54e10b5a6165414106d63c91 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 15:48:02 +0000 Subject: [PATCH 56/72] fix: upgrade runtime dependencies --- agents/addon/extension/chat_transcriber/manifest.json | 7 +------ agents/addon/extension/elevenlabs_tts/manifest.json | 9 ++------- agents/addon/extension/interrupt_detector/manifest.json | 7 +------ agents/addon/extension/openai_chatgpt/manifest.json | 7 +------ 4 files changed, 5 insertions(+), 25 deletions(-) diff --git a/agents/addon/extension/chat_transcriber/manifest.json b/agents/addon/extension/chat_transcriber/manifest.json index 269a5a534..8d3470fd4 100644 --- a/agents/addon/extension/chat_transcriber/manifest.json +++ b/agents/addon/extension/chat_transcriber/manifest.json @@ -4,15 +4,10 @@ "version": "0.1.0", "language": "go", "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/elevenlabs_tts/manifest.json b/agents/addon/extension/elevenlabs_tts/manifest.json index 620fb2248..063c2e845 100644 --- a/agents/addon/extension/elevenlabs_tts/manifest.json +++ b/agents/addon/extension/elevenlabs_tts/manifest.json @@ -4,15 +4,10 @@ "version": "0.1.0", "language": "go", "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { @@ -71,4 +66,4 @@ } ] } -} +} \ No newline at end of file diff --git a/agents/addon/extension/interrupt_detector/manifest.json b/agents/addon/extension/interrupt_detector/manifest.json index 05781cf5a..2a29ba1fa 100644 --- a/agents/addon/extension/interrupt_detector/manifest.json +++ b/agents/addon/extension/interrupt_detector/manifest.json @@ -4,15 +4,10 @@ "version": "0.1.0", "language": "go", "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt/manifest.json b/agents/addon/extension/openai_chatgpt/manifest.json index aa4f6d50d..a2fc4f61c 100644 --- a/agents/addon/extension/openai_chatgpt/manifest.json +++ b/agents/addon/extension/openai_chatgpt/manifest.json @@ -5,15 +5,10 @@ "language": "go", "support": [], "dependencies": [ - { - "type": "system", - "name": "rte_runtime", - "version": "0.1.0" - }, { "type": "system", "name": "rte_runtime_go", - "version": "0.1.0" + "version": "0.3.1" } ], "api": { From 1be42ae3973f4912fe0b3ac346e36cfddbd048c4 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 15:50:17 +0000 Subject: [PATCH 57/72] fix: compatible with 0.3.x runtime api --- .../extension/bedrock_llm_python/__init__.py | 2 +- .../bedrock_llm_extension.py | 210 ++++++++++++------ .../bedrock_llm_python/manifest.json | 16 +- .../bedrock_llm_python/requirements.txt | 2 +- .../chat_transcriber_addon.py | 10 +- .../chat_transcriber_extension.py | 18 +- .../chat_transcriber_python/manifest.json | 6 +- .../extension/cosy_tts/cosy_tts_addon.py | 10 +- .../extension/cosy_tts/cosy_tts_extension.py | 20 +- agents/addon/extension/cosy_tts/manifest.json | 6 +- .../elevenlabs_tts_addon.py | 10 +- .../elevenlabs_tts_extension.py | 26 ++- .../elevenlabs_tts_python/manifest.json | 4 +- .../extension/elevenlabs_tts_python/pcm.py | 4 +- .../interrupt_detector_addon.py | 10 +- .../interrupt_detector_extension.py | 18 +- .../interrupt_detector_python/manifest.json | 2 +- .../openai_chatgpt_python/manifest.json | 2 +- .../openai_chatgpt_addon.py | 10 +- .../openai_chatgpt_extension.py | 153 +++++++++---- .../openai_chatgpt_python/requirements.txt | 2 +- .../extension/qwen_llm_python/manifest.json | 52 ++--- .../qwen_llm_python/qwen_llm_addon.py | 10 +- .../qwen_llm_python/qwen_llm_extension.py | 26 +-- 24 files changed, 380 insertions(+), 249 deletions(-) diff --git a/agents/addon/extension/bedrock_llm_python/__init__.py b/agents/addon/extension/bedrock_llm_python/__init__.py index af911d6c6..4e39b9dca 100644 --- a/agents/addon/extension/bedrock_llm_python/__init__.py +++ b/agents/addon/extension/bedrock_llm_python/__init__.py @@ -1,4 +1,4 @@ from . import bedrock_llm_extension from .log import logger -logger.info("bedrock_llm_extension extension loaded") +logger.info("bedrock_llm_python extension loaded") diff --git a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py index 43153cd8d..e8dd64720 100644 --- a/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py +++ b/agents/addon/extension/bedrock_llm_python/bedrock_llm_extension.py @@ -1,20 +1,17 @@ from .bedrock_llm import BedrockLLM, BedrockLLMConfig from datetime import datetime from threading import Thread -from rte_runtime_python import ( +from rte import ( Addon, Extension, register_addon_as_extension, - Rte, + RteEnv, Cmd, Data, StatusCode, CmdResult, MetadataInfo, - RTE_PIXEL_FMT, ) -from rte_runtime_python.image_frame import ImageFrame -from PIL import Image, ImageFilter from .log import logger @@ -36,6 +33,7 @@ PROPERTY_GREETING = "greeting" # Optional PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional + def get_current_time(): # Get the current time start_time = datetime.now() @@ -43,11 +41,13 @@ def get_current_time(): unix_microseconds = int(start_time.timestamp() * 1_000_000) return unix_microseconds + def is_punctuation(char): - if char in [',', ',', '.', '。', '?', '?', '!', '!']: + if char in [",", ",", ".", "。", "?", "?", "!", "!"]: return True return False + def parse_sentence(sentence, content): remain = "" found_punc = False @@ -63,17 +63,20 @@ def parse_sentence(sentence, content): return sentence, remain, found_punc + class BedrockLLMExtension(Extension): memory = [] max_memory_length = 10 outdate_ts = 0 bedrock_llm = None - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: logger.info("BedrockLLMExtension on_init") rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("BedrockLLMExtension on_start") # Prepare configuration bedrock_llm_config = BedrockLLMConfig.default_config() @@ -83,21 +86,27 @@ def on_start(self, rte: Rte) -> None: if region: bedrock_llm_config.region = region except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_REGION} failed, err: {err}. Using default value: {bedrock_llm_config.region}") + logger.debug( + f"GetProperty optional {PROPERTY_REGION} failed, err: {err}. Using default value: {bedrock_llm_config.region}" + ) return try: access_key = rte.get_property_string(PROPERTY_ACCESS_KEY) bedrock_llm_config.access_key = access_key except Exception as err: - logger.error(f"GetProperty optional {PROPERTY_ACCESS_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.access_key}") + logger.error( + f"GetProperty optional {PROPERTY_ACCESS_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.access_key}" + ) return try: secret_key = rte.get_property_string(PROPERTY_SECRET_KEY) bedrock_llm_config.secret_key = secret_key except Exception as err: - logger.error(f"GetProperty optional {PROPERTY_SECRET_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.secret_key}") + logger.error( + f"GetProperty optional {PROPERTY_SECRET_KEY} failed, err: {err}. Using default value: {bedrock_llm_config.secret_key}" + ) return try: @@ -105,50 +114,66 @@ def on_start(self, rte: Rte) -> None: if model: bedrock_llm_config.model = model except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_MODEL} error: {err}. Using default value: {bedrock_llm_config.model}") + logger.debug( + f"GetProperty optional {PROPERTY_MODEL} error: {err}. Using default value: {bedrock_llm_config.model}" + ) try: prompt = rte.get_property_string(PROPERTY_PROMPT) if prompt: bedrock_llm_config.prompt = prompt except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_PROMPT} error: {err}. Using default value: {bedrock_llm_config.prompt}") + logger.debug( + f"GetProperty optional {PROPERTY_PROMPT} error: {err}. Using default value: {bedrock_llm_config.prompt}" + ) try: temperature = rte.get_property_float(PROPERTY_TEMPERATURE) bedrock_llm_config.temperature = float(temperature) except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}. Using default value: {bedrock_llm_config.temperature}") + logger.debug( + f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}. Using default value: {bedrock_llm_config.temperature}" + ) try: top_p = rte.get_property_float(PROPERTY_TOP_P) bedrock_llm_config.top_p = float(top_p) except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}. Using default value: {bedrock_llm_config.top_p}") + logger.debug( + f"GetProperty optional {PROPERTY_TOP_P} failed, err: {err}. Using default value: {bedrock_llm_config.top_p}" + ) try: max_tokens = rte.get_property_int(PROPERTY_MAX_TOKENS) if max_tokens > 0: bedrock_llm_config.max_tokens = int(max_tokens) except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}. Using default value: {bedrock_llm_config.max_tokens}") + logger.debug( + f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}. Using default value: {bedrock_llm_config.max_tokens}" + ) try: greeting = rte.get_property_string(PROPERTY_GREETING) except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}.") + logger.debug( + f"GetProperty optional {PROPERTY_GREETING} failed, err: {err}." + ) try: prop_max_memory_length = rte.get_property_int(PROPERTY_MAX_MEMORY_LENGTH) if prop_max_memory_length > 0: self.max_memory_length = int(prop_max_memory_length) except Exception as err: - logger.debug(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}.") + logger.debug( + f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}." + ) # Create bedrockLLM instance try: self.bedrock_llm = BedrockLLM(bedrock_llm_config) - logger.info(f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}") + logger.info( + f"newBedrockLLM succeed with max_tokens: {bedrock_llm_config.max_tokens}, model: {bedrock_llm_config.model}" + ) except Exception as err: logger.info(f"newBedrockLLM failed, err: {err}") @@ -156,23 +181,27 @@ def on_start(self, rte: Rte) -> None: if greeting: try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) rte.send_data(output_data) logger.info(f"greeting [{greeting}] sent") except Exception as err: logger.info(f"greeting [{greeting}] send failed, err: {err}") rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("BedrockLLMExtension on_stop") rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("BedrockLLMExtension on_deinit") rte.on_deinit_done() - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("BedrockLLMExtension on_cmd") cmd_json = cmd.to_json() logger.info("BedrockLLMExtension on_cmd json: " + cmd_json) @@ -195,10 +224,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) - def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: - logger.info("BedrockLLMExtension on_cmd") - - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: """ on_data receives data from rte graph. current supported data: @@ -215,7 +241,9 @@ def on_data(self, rte: Rte, data: Data) -> None: logger.info("ignore non-final input") return except Exception as err: - logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) return # Get input text @@ -226,58 +254,72 @@ def on_data(self, rte: Rte, data: Data) -> None: return logger.info(f"OnData input text: [{input_text}]") except Exception as err: - logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) return # Prepare memory. A conversation must alternate between user and assistant roles while len(self.memory): if len(self.memory) > self.max_memory_length: - logger.debug(f'pop out first message, reason: memory length limit: `{self.memory[0]}`') + logger.debug( + f"pop out first message, reason: memory length limit: `{self.memory[0]}`" + ) self.memory.pop(0) - elif self.memory[0]['role'] == 'assistant': - logger.debug(f'pop out first message, reason: messages can not start with assistant: `{self.memory[0]}`') + elif self.memory[0]["role"] == "assistant": + logger.debug( + f"pop out first message, reason: messages can not start with assistant: `{self.memory[0]}`" + ) self.memory.pop(0) else: break - if len(self.memory) and self.memory[-1]['role'] == 'user': + if len(self.memory) and self.memory[-1]["role"] == "user": # if last user input got empty response, append current user input. - logger.debug(f'found last message with role `user`, will append this input into last user input') - self.memory[-1]['content'].append( - {'text': input_text} + logger.debug( + f"found last message with role `user`, will append this input into last user input" ) + self.memory[-1]["content"].append({"text": input_text}) else: - self.memory.append({"role": "user", "content": [ - {'text': input_text} - ]}) + self.memory.append({"role": "user", "content": [{"text": input_text}]}) def converse_stream_worker(start_time, input_text, memory): try: - logger.info(f"GetConverseStream for input text: [{input_text}] memory: {memory}") + logger.info( + f"GetConverseStream for input text: [{input_text}] memory: {memory}" + ) # Get result from Bedrock resp = self.bedrock_llm.get_converse_stream(memory) - if resp is None or resp.get('stream') is None: - logger.info(f"GetConverseStream for input text: [{input_text}] failed") + if resp is None or resp.get("stream") is None: + logger.info( + f"GetConverseStream for input text: [{input_text}] failed" + ) return - stream = resp.get('stream') + stream = resp.get("stream") sentence = "" full_content = "" first_sentence_sent = False for event in stream: if start_time < self.outdate_ts: - logger.info(f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}") + logger.info( + f"GetConverseStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}" + ) break - if 'contentBlockDelta' in event: - delta_types = event['contentBlockDelta']['delta'].keys() + if "contentBlockDelta" in event: + delta_types = event["contentBlockDelta"]["delta"].keys() # ignore other types of content: e.g toolUse - if 'text' in delta_types: - content = event['contentBlockDelta']['delta']['text'] - elif 'internalServerException' in event or 'modelStreamErrorException' in event \ - or 'throttlingException' in event or 'validationException' in event: + if "text" in delta_types: + content = event["contentBlockDelta"]["delta"]["text"] + elif ( + "internalServerException" in event + or "modelStreamErrorException" in event + or "throttlingException" in event + or "validationException" in event + ): logger.error(f"GetConverseStream Error occured: {event}") break else: @@ -287,68 +329,98 @@ def converse_stream_worker(start_time, input_text, memory): full_content += content while True: - sentence, content, sentence_is_final = parse_sentence(sentence, content) + sentence, content, sentence_is_final = parse_sentence( + sentence, content + ) if len(sentence) == 0 or not sentence_is_final: logger.info(f"sentence {sentence} is empty or not final") break - logger.info(f"GetConverseStream recv for input text: [{input_text}] got sentence: [{sentence}]") + logger.info( + f"GetConverseStream recv for input text: [{input_text}] got sentence: [{sentence}]" + ) # send sentence try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False + ) rte.send_data(output_data) - logger.info(f"GetConverseStream recv for input text: [{input_text}] sent sentence [{sentence}]") + logger.info( + f"GetConverseStream recv for input text: [{input_text}] sent sentence [{sentence}]" + ) except Exception as err: - logger.info(f"GetConverseStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}") + logger.info( + f"GetConverseStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}" + ) break sentence = "" if not first_sentence_sent: first_sentence_sent = True - logger.info(f"GetConverseStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms") + logger.info( + f"GetConverseStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms" + ) if len(full_content.strip()): # remember response as assistant content in memory - memory.append({"role": "assistant", "content": [{"text": full_content}]}) + memory.append( + {"role": "assistant", "content": [{"text": full_content}]} + ) else: # can not put empty model response into memory - logger.error(f"GetConverseStream recv for input text: [{input_text}] failed: empty response [{full_content}]") + logger.error( + f"GetConverseStream recv for input text: [{input_text}] failed: empty response [{full_content}]" + ) return # send end of segment try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) rte.send_data(output_data) - logger.info(f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent") + logger.info( + f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent" + ) except Exception as err: - logger.info(f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}") + logger.info( + f"GetConverseStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}" + ) except Exception as e: - logger.info(f"GetConverseStream for input text: [{input_text}] failed, err: {e}") + logger.info( + f"GetConverseStream for input text: [{input_text}] failed, err: {e}" + ) # Start thread to request and read responses from OpenAI start_time = get_current_time() - thread = Thread(target=converse_stream_worker, args=(start_time, input_text, self.memory)) + thread = Thread( + target=converse_stream_worker, args=(start_time, input_text, self.memory) + ) thread.start() logger.info(f"BedrockLLMExtension on_data end") @register_addon_as_extension("bedrock_llm_python") class BedrockLLMExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("BedrockLLMExtensionAddon on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") rte.on_create_instance_done(BedrockLLMExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("BedrockLLMExtensionAddon on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/bedrock_llm_python/manifest.json b/agents/addon/extension/bedrock_llm_python/manifest.json index bd876e4fd..7f9125813 100644 --- a/agents/addon/extension/bedrock_llm_python/manifest.json +++ b/agents/addon/extension/bedrock_llm_python/manifest.json @@ -7,31 +7,31 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.1" } ], "api": { "property": { "access_key": { - "type": "string" + "type": "string" }, "secret_key": { - "type": "string" + "type": "string" }, "model": { - "type": "string" + "type": "string" }, "max_tokens": { - "type": "int64" + "type": "int64" }, "prompt": { - "type": "string" + "type": "string" }, "greeting": { - "type": "string" + "type": "string" }, "max_memory_length": { - "type": "int64" + "type": "int64" } }, "data_in": [ diff --git a/agents/addon/extension/bedrock_llm_python/requirements.txt b/agents/addon/extension/bedrock_llm_python/requirements.txt index 40ddd4741..3f1da6f45 100644 --- a/agents/addon/extension/bedrock_llm_python/requirements.txt +++ b/agents/addon/extension/bedrock_llm_python/requirements.txt @@ -1,4 +1,4 @@ -pillow==10.4.0 +# pillow==10.4.0 # openai==1.35.13 # requests==2.32.3 boto3==1.34.143 \ No newline at end of file diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py index 8844c12e5..9e1412651 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_addon.py @@ -1,26 +1,26 @@ -from rte_runtime_python import ( +from rte import ( Addon, register_addon_as_extension, - Rte, + RteEnv, ) from .log import logger @register_addon_as_extension("chat_transcriber_python") class ChatTranscriberExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .chat_transcriber_extension import ChatTranscriberExtension rte.on_create_instance_done(ChatTranscriberExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index f30e026b0..66fccda18 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -7,9 +7,9 @@ # import json -from rte_runtime_python import ( +from rte import ( Extension, - Rte, + RteEnv, Cmd, Data, StatusCode, @@ -32,23 +32,25 @@ class ChatTranscriberExtension(Extension): - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: logger.info("on_init") rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("on_start") rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("on_stop") rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("on_cmd") cmd_json = cmd.to_json() logger.info("on_cmd json: {}".format(cmd_json)) @@ -57,7 +59,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: """ on_data receives data from rte graph. current supported data: diff --git a/agents/addon/extension/chat_transcriber_python/manifest.json b/agents/addon/extension/chat_transcriber_python/manifest.json index c171b4466..f64295b64 100644 --- a/agents/addon/extension/chat_transcriber_python/manifest.json +++ b/agents/addon/extension/chat_transcriber_python/manifest.json @@ -5,9 +5,9 @@ "language": "python", "dependencies": [ { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/cosy_tts/cosy_tts_addon.py b/agents/addon/extension/cosy_tts/cosy_tts_addon.py index 0cdc96488..b9135b3e5 100644 --- a/agents/addon/extension/cosy_tts/cosy_tts_addon.py +++ b/agents/addon/extension/cosy_tts/cosy_tts_addon.py @@ -1,26 +1,26 @@ -from rte_runtime_python import ( +from rte import ( Addon, register_addon_as_extension, - Rte, + RteEnv, ) from .log import logger @register_addon_as_extension("cosy_tts") class CosyTTSExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("CosyTTSExtensionAddon on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .cosy_tts_extension import CosyTTSExtension rte.on_create_instance_done(CosyTTSExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("CosyTTSExtensionAddon on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/cosy_tts/cosy_tts_extension.py b/agents/addon/extension/cosy_tts/cosy_tts_extension.py index 868d62014..79ecfcd8c 100644 --- a/agents/addon/extension/cosy_tts/cosy_tts_extension.py +++ b/agents/addon/extension/cosy_tts/cosy_tts_extension.py @@ -6,9 +6,9 @@ # # import traceback -from rte_runtime_python import ( +from rte import ( Extension, - Rte, + RteEnv, Cmd, PcmFrame, RTE_PCM_FRAME_DATA_FMT, @@ -30,7 +30,7 @@ class CosyTTSCallback(ResultCallback): _player = None _stream = None - def __init__(self, rte: Rte, sample_rate: int): + def __init__(self, rte: RteEnv, sample_rate: int): super().__init__() self.rte = rte self.sample_rate = sample_rate @@ -116,11 +116,11 @@ def __init__(self, name: str): self.queue = queue.Queue() self.mutex = threading.Lock() - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init(self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo) -> None: logger.info("CosyTTSExtension on_init") rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("CosyTTSExtension on_start") self.api_key = rte.get_property_string("api_key") self.voice = rte.get_property_string("voice") @@ -151,7 +151,7 @@ def on_start(self, rte: Rte) -> None: self.thread.start() rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("CosyTTSExtension on_stop") self.stopped = True @@ -160,14 +160,14 @@ def on_stop(self, rte: Rte) -> None: self.thread.join() rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("CosyTTSExtension on_deinit") rte.on_deinit_done() def need_interrupt(self, ts: datetime.time) -> bool: return self.outdateTs > ts and (self.outdateTs - ts).total_seconds() > 1 - def async_handle(self, rte: Rte): + def async_handle(self, rte: RteEnv): try: tts = None callback = None @@ -221,7 +221,7 @@ def flush(self): self.queue.get() self.queue.put(("", datetime.now())) - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: logger.info("CosyTTSExtension on_data") inputText = data.get_property_string("text") if len(inputText) == 0: @@ -233,7 +233,7 @@ def on_data(self, rte: Rte, data: Data) -> None: logger.info("on data %s %d", inputText, is_end) self.queue.put((inputText, datetime.now())) - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("CosyTTSExtension on_cmd") cmd_json = cmd.to_json() logger.info("CosyTTSExtension on_cmd json: %s" + cmd_json) diff --git a/agents/addon/extension/cosy_tts/manifest.json b/agents/addon/extension/cosy_tts/manifest.json index 09067fe81..8690db031 100644 --- a/agents/addon/extension/cosy_tts/manifest.json +++ b/agents/addon/extension/cosy_tts/manifest.json @@ -5,9 +5,9 @@ "language": "python", "dependencies": [ { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py index 2b6a93efb..90d2bd4aa 100644 --- a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_addon.py @@ -6,28 +6,28 @@ # # -from rte_runtime_python import ( +from rte import ( Addon, register_addon_as_extension, - Rte, + RteEnv, ) from .log import logger @register_addon_as_extension("elevenlabs_tts_python") class ElevenlabsTTSExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .elevenlabs_tts_extension import ElevenlabsTTSExtension rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py index 1c2799624..803ed5c24 100644 --- a/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py +++ b/agents/addon/extension/elevenlabs_tts_python/elevenlabs_tts_extension.py @@ -10,11 +10,11 @@ import threading import time -from rte_runtime_python import ( +from rte import ( Addon, Extension, register_addon_as_extension, - Rte, + RteEnv, Cmd, CmdResult, StatusCode, @@ -47,7 +47,9 @@ def __init__(self, text: str, received_ts: int) -> None: class ElevenlabsTTSExtension(Extension): - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: logger.info("on_init") self.elevenlabs_tts = None @@ -58,7 +60,7 @@ def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> N rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("on_start") # prepare configuration @@ -150,15 +152,15 @@ def on_start(self, rte: Rte) -> None: rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("on_stop") rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: """ on_cmd receives cmd from rte graph. current supported cmd: @@ -182,7 +184,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: """ on_data receives data from rte graph. current supported data: @@ -208,7 +210,7 @@ def on_data(self, rte: Rte, data: Data) -> None: self.text_queue.put(Message(text, int(time.time() * 1000000))) - def process_text_queue(self, rte: Rte): + def process_text_queue(self, rte: RteEnv): logger.info("process_text_queue") while True: @@ -281,16 +283,16 @@ def process_text_queue(self, rte: Rte): @register_addon_as_extension("elevenlabs_tts_python") class ElevenlabsTTSExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") rte.on_create_instance_done(ElevenlabsTTSExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/elevenlabs_tts_python/manifest.json b/agents/addon/extension/elevenlabs_tts_python/manifest.json index 35b49e5ca..7cf5c0619 100644 --- a/agents/addon/extension/elevenlabs_tts_python/manifest.json +++ b/agents/addon/extension/elevenlabs_tts_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.1" } ], "api": { @@ -66,4 +66,4 @@ } ] } -} +} \ No newline at end of file diff --git a/agents/addon/extension/elevenlabs_tts_python/pcm.py b/agents/addon/extension/elevenlabs_tts_python/pcm.py index eeb33c015..8f73c87b4 100644 --- a/agents/addon/extension/elevenlabs_tts_python/pcm.py +++ b/agents/addon/extension/elevenlabs_tts_python/pcm.py @@ -8,7 +8,7 @@ import logging from typing import Iterator -from rte_runtime_python import PcmFrame, Rte, RTE_PCM_FRAME_DATA_FMT +from rte import PcmFrame, RteEnv, RTE_PCM_FRAME_DATA_FMT class Pcm: @@ -56,7 +56,7 @@ def read_pcm_stream( if chunk: yield chunk - def send(self, rte: Rte, buf: memoryview) -> None: + def send(self, rte: RteEnv, buf: memoryview) -> None: try: frame = self.get_pcm_frame(buf) rte.send_pcm_frame(frame) diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py index accab6920..407d7816f 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_addon.py @@ -6,29 +6,29 @@ # # -from rte_runtime_python import ( +from rte import ( Addon, register_addon_as_extension, - Rte, + RteEnv, ) from .log import logger @register_addon_as_extension("interrupt_detector_python") class InterruptDetectorExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .interrupt_detector_extension import InterruptDetectorExtension rte.on_create_instance_done(InterruptDetectorExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py index dcc220f35..510b4e1e5 100644 --- a/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py +++ b/agents/addon/extension/interrupt_detector_python/interrupt_detector_extension.py @@ -6,9 +6,9 @@ # # -from rte_runtime_python import ( +from rte import ( Extension, - Rte, + RteEnv, Cmd, Data, StatusCode, @@ -25,23 +25,25 @@ class InterruptDetectorExtension(Extension): - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: logger.info("on_init") rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("on_start") rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("on_stop") rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("on_deinit") rte.on_deinit_done() - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("on_cmd") cmd_json = cmd.to_json() logger.info("on_cmd json: " % cmd_json) @@ -50,7 +52,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: """ on_data receives data from rte graph. current supported data: diff --git a/agents/addon/extension/interrupt_detector_python/manifest.json b/agents/addon/extension/interrupt_detector_python/manifest.json index cc261c7c4..692aea026 100644 --- a/agents/addon/extension/interrupt_detector_python/manifest.json +++ b/agents/addon/extension/interrupt_detector_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt_python/manifest.json b/agents/addon/extension/openai_chatgpt_python/manifest.json index 290b16299..9fc1b3626 100644 --- a/agents/addon/extension/openai_chatgpt_python/manifest.json +++ b/agents/addon/extension/openai_chatgpt_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.1" } ], "api": { diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py index 7f2c039a2..537eda5bb 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_addon.py @@ -5,29 +5,29 @@ # Copyright (c) 2024 Agora IO. All rights reserved. # # -from rte_runtime_python import ( +from rte import ( Addon, register_addon_as_extension, - Rte, + RteEnv, ) from .log import logger @register_addon_as_extension("openai_chatgpt_python") class OpenAIChatGPTExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("OpenAIChatGPTExtensionAddon on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") from .openai_chatgpt_extension import OpenAIChatGPTExtension rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtensionAddon on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py index 75ce1c7a8..6cabe6cbc 100644 --- a/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py +++ b/agents/addon/extension/openai_chatgpt_python/openai_chatgpt_extension.py @@ -8,20 +8,17 @@ from .openai_chatgpt import OpenAIChatGPT, OpenAIChatGPTConfig from datetime import datetime from threading import Thread -from rte_runtime_python import ( +from rte import ( Addon, Extension, register_addon_as_extension, - Rte, + RteEnv, Cmd, Data, StatusCode, CmdResult, MetadataInfo, - RTE_PIXEL_FMT, ) -from rte_runtime_python.image_frame import ImageFrame -from PIL import Image, ImageFilter from .log import logger @@ -45,6 +42,7 @@ PROPERTY_PROXY_URL = "proxy_url" # Optional PROPERTY_MAX_MEMORY_LENGTH = "max_memory_length" # Optional + def get_current_time(): # Get the current time start_time = datetime.now() @@ -52,11 +50,13 @@ def get_current_time(): unix_microseconds = int(start_time.timestamp() * 1_000_000) return unix_microseconds + def is_punctuation(char): - if char in [',', ',', '.', '。', '?', '?', '!', '!']: + if char in [",", ",", ".", "。", "?", "?", "!", "!"]: return True return False + def parse_sentence(sentence, content): remain = "" found_punc = False @@ -72,17 +72,20 @@ def parse_sentence(sentence, content): return sentence, remain, found_punc + class OpenAIChatGPTExtension(Extension): memory = [] max_memory_length = 10 outdate_ts = 0 openai_chatgpt = None - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: logger.info("OpenAIChatGPTExtension on_init") rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtension on_start") # Prepare configuration openai_chatgpt_config = OpenAIChatGPTConfig.default_config() @@ -119,19 +122,25 @@ def on_start(self, rte: Rte) -> None: frequency_penalty = rte.get_property_float(PROPERTY_FREQUENCY_PENALTY) openai_chatgpt_config.frequency_penalty = float(frequency_penalty) except Exception as err: - logger.info(f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}") + logger.info( + f"GetProperty optional {PROPERTY_FREQUENCY_PENALTY} failed, err: {err}" + ) try: presence_penalty = rte.get_property_float(PROPERTY_PRESENCE_PENALTY) openai_chatgpt_config.presence_penalty = float(presence_penalty) except Exception as err: - logger.info(f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}") + logger.info( + f"GetProperty optional {PROPERTY_PRESENCE_PENALTY} failed, err: {err}" + ) try: temperature = rte.get_property_float(PROPERTY_TEMPERATURE) openai_chatgpt_config.temperature = float(temperature) except Exception as err: - logger.info(f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}") + logger.info( + f"GetProperty optional {PROPERTY_TEMPERATURE} failed, err: {err}" + ) try: top_p = rte.get_property_float(PROPERTY_TOP_P) @@ -144,7 +153,9 @@ def on_start(self, rte: Rte) -> None: if max_tokens > 0: openai_chatgpt_config.max_tokens = int(max_tokens) except Exception as err: - logger.info(f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}") + logger.info( + f"GetProperty optional {PROPERTY_MAX_TOKENS} failed, err: {err}" + ) try: proxy_url = rte.get_property_string(PROPERTY_PROXY_URL) @@ -162,12 +173,16 @@ def on_start(self, rte: Rte) -> None: if prop_max_memory_length > 0: self.max_memory_length = int(prop_max_memory_length) except Exception as err: - logger.info(f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}") + logger.info( + f"GetProperty optional {PROPERTY_MAX_MEMORY_LENGTH} failed, err: {err}" + ) # Create openaiChatGPT instance try: self.openai_chatgpt = OpenAIChatGPT(openai_chatgpt_config) - logger.info(f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.max_tokens}, model: {openai_chatgpt_config.model}") + logger.info( + f"newOpenaiChatGPT succeed with max_tokens: {openai_chatgpt_config.max_tokens}, model: {openai_chatgpt_config.model}" + ) except Exception as err: logger.info(f"newOpenaiChatGPT failed, err: {err}") @@ -175,23 +190,27 @@ def on_start(self, rte: Rte) -> None: if greeting: try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, greeting + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) rte.send_data(output_data) logger.info(f"greeting [{greeting}] sent") except Exception as err: logger.info(f"greeting [{greeting}] send failed, err: {err}") rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtension on_stop") rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtension on_deinit") rte.on_deinit_done() - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("OpenAIChatGPTExtension on_cmd") cmd_json = cmd.to_json() logger.info("OpenAIChatGPTExtension on_cmd json: " + cmd_json) @@ -214,10 +233,7 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_result.set_property_string("detail", "success") rte.return_result(cmd_result, cmd) - def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: - logger.info("OpenAIChatGPTExtension on_cmd") - - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: """ on_data receives data from rte graph. current supported data: @@ -234,7 +250,9 @@ def on_data(self, rte: Rte, data: Data) -> None: logger.info("ignore non-final input") return except Exception as err: - logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}") + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_IS_FINAL} failed, err: {err}" + ) return # Get input text @@ -245,7 +263,9 @@ def on_data(self, rte: Rte, data: Data) -> None: return logger.info(f"OnData input text: [{input_text}]") except Exception as err: - logger.info(f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}") + logger.info( + f"OnData GetProperty {DATA_IN_TEXT_DATA_PROPERTY_TEXT} failed, err: {err}" + ) return # Prepare memory @@ -253,27 +273,35 @@ def on_data(self, rte: Rte, data: Data) -> None: self.memory.pop(0) self.memory.append({"role": "user", "content": input_text}) - def chat_completions_stream_worker(start_time, input_text, memory): try: - logger.info(f"GetChatCompletionsStream for input text: [{input_text}] memory: {memory}") + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] memory: {memory}" + ) # Get result from AI resp = self.openai_chatgpt.get_chat_completions_stream(memory) if resp is None: - logger.info(f"GetChatCompletionsStream for input text: [{input_text}] failed") + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] failed" + ) return - + sentence = "" full_content = "" first_sentence_sent = False for chat_completions in resp: if start_time < self.outdate_ts: - logger.info(f"GetChatCompletionsStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}") + logger.info( + f"GetChatCompletionsStream recv interrupt and flushing for input text: [{input_text}], startTs: {start_time}, outdateTs: {self.outdate_ts}" + ) break - - if len(chat_completions.choices) > 0 and chat_completions.choices[0].delta.content is not None: + + if ( + len(chat_completions.choices) > 0 + and chat_completions.choices[0].delta.content is not None + ): content = chat_completions.choices[0].delta.content else: content = "" @@ -281,27 +309,41 @@ def chat_completions_stream_worker(start_time, input_text, memory): full_content += content while True: - sentence, content, sentence_is_final = parse_sentence(sentence, content) + sentence, content, sentence_is_final = parse_sentence( + sentence, content + ) if len(sentence) == 0 or not sentence_is_final: logger.info(f"sentence {sentence} is empty or not final") break - logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] got sentence: [{sentence}]") + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] got sentence: [{sentence}]" + ) # send sentence try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, False + ) rte.send_data(output_data) - logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] sent sentence [{sentence}]") + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] sent sentence [{sentence}]" + ) except Exception as err: - logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}") + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] send sentence [{sentence}] failed, err: {err}" + ) break sentence = "" if not first_sentence_sent: first_sentence_sent = True - logger.info(f"GetChatCompletionsStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms") + logger.info( + f"GetChatCompletionsStream recv for input text: [{input_text}] first sentence sent, first_sentence_latency {get_current_time() - start_time}ms" + ) # remember response as assistant content in memory memory.append({"role": "assistant", "content": full_content}) @@ -309,19 +351,32 @@ def chat_completions_stream_worker(start_time, input_text, memory): # send end of segment try: output_data = Data.create("text_data") - output_data.set_property_string(DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence) - output_data.set_property_bool(DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True) + output_data.set_property_string( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT, sentence + ) + output_data.set_property_bool( + DATA_OUT_TEXT_DATA_PROPERTY_TEXT_END_OF_SEGMENT, True + ) rte.send_data(output_data) - logger.info(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent") + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] sent" + ) except Exception as err: - logger.info(f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}") - + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] end of segment with sentence [{sentence}] send failed, err: {err}" + ) + except Exception as e: - logger.info(f"GetChatCompletionsStream for input text: [{input_text}] failed, err: {e}") + logger.info( + f"GetChatCompletionsStream for input text: [{input_text}] failed, err: {e}" + ) # Start thread to request and read responses from OpenAI start_time = get_current_time() - thread = Thread(target=chat_completions_stream_worker, args=(start_time, input_text, self.memory)) + thread = Thread( + target=chat_completions_stream_worker, + args=(start_time, input_text, self.memory), + ) thread.start() logger.info(f"OpenAIChatGPTExtension on_data end") # try: @@ -336,16 +391,16 @@ def chat_completions_stream_worker(start_time, input_text, memory): @register_addon_as_extension("openai_chatgpt_python") class OpenAIChatGPTExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("OpenAIChatGPTExtensionAddon on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context) -> None: + def on_create_instance(self, rte: RteEnv, addon_name: str, context) -> None: logger.info("on_create_instance") rte.on_create_instance_done(OpenAIChatGPTExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("OpenAIChatGPTExtensionAddon on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/openai_chatgpt_python/requirements.txt b/agents/addon/extension/openai_chatgpt_python/requirements.txt index d4da23b7a..c8aae8b65 100644 --- a/agents/addon/extension/openai_chatgpt_python/requirements.txt +++ b/agents/addon/extension/openai_chatgpt_python/requirements.txt @@ -1,3 +1,3 @@ -pillow==10.4.0 +# pillow==10.4.0 openai==1.35.13 requests==2.32.3 \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/manifest.json b/agents/addon/extension/qwen_llm_python/manifest.json index c226e41d0..02b7a98ee 100644 --- a/agents/addon/extension/qwen_llm_python/manifest.json +++ b/agents/addon/extension/qwen_llm_python/manifest.json @@ -7,7 +7,7 @@ { "type": "system", "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.1" } ], "api": { @@ -32,40 +32,40 @@ } }, "data_in": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "is_final": { - "type": "bool" + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "is_final": { + "type": "bool" + } } } - } ], "data_out": [ - { - "name": "text_data", - "property": { - "text": { - "type": "string" - }, - "end_of_segment": { - "type": "bool" + { + "name": "text_data", + "property": { + "text": { + "type": "string" + }, + "end_of_segment": { + "type": "bool" + } } } - } ], "cmd_in": [ - { - "name": "flush" - } + { + "name": "flush" + } ], "cmd_out": [ - { - "name": "flush" - } + { + "name": "flush" + } ] -} + } } \ No newline at end of file diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py index ee63185f9..b635f64e2 100644 --- a/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_addon.py @@ -5,29 +5,29 @@ # Copyright (c) 2024 Agora IO. All rights reserved. # # -from rte_runtime_python import ( +from rte import ( Addon, register_addon_as_extension, - Rte, + RteEnv, ) from .log import logger @register_addon_as_extension("qwen_llm_python") class QWenLLMExtensionAddon(Addon): - def on_init(self, rte: Rte, manifest, property) -> None: + def on_init(self, rte: RteEnv, manifest, property) -> None: logger.info("QWenLLMExtensionAddon on_init") rte.on_init_done(manifest, property) return - def on_create_instance(self, rte: Rte, addon_name: str, context): + def on_create_instance(self, rte: RteEnv, addon_name: str, context): logger.info("on_create_instance") from .qwen_llm_extension import QWenLLMExtension rte.on_create_instance_done(QWenLLMExtension(addon_name), context) - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("QWenLLMExtensionAddon on_deinit") rte.on_deinit_done() return diff --git a/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py index 90ce144ce..1b34e20b8 100644 --- a/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py +++ b/agents/addon/extension/qwen_llm_python/qwen_llm_extension.py @@ -5,16 +5,15 @@ # Copyright (c) 2024 Agora IO. All rights reserved. # # -from rte_runtime_python import ( +from rte import ( Extension, - Rte, + RteEnv, Cmd, Data, StatusCode, CmdResult, MetadataInfo, ) -from rte_runtime_python.image_frame import ImageFrame from typing import List, Any import dashscope import queue @@ -99,7 +98,7 @@ def call(self, messages: List[Any]): logger.info("Failed to get response %s", response) def call_with_stream( - self, rte: Rte, ts: datetime.time, inputText: str, messages: List[Any] + self, rte: RteEnv, ts: datetime.time, inputText: str, messages: List[Any] ): if self.need_interrupt(ts): logger.warning("out of date, %s, %s", self.outdateTs, ts) @@ -162,11 +161,13 @@ def call_with_stream( self.on_msg("assistant", total) logger.info("on response %s", total) - def on_init(self, rte: Rte, manifest: MetadataInfo, property: MetadataInfo) -> None: + def on_init( + self, rte: RteEnv, manifest: MetadataInfo, property: MetadataInfo + ) -> None: logger.info("QWenLLMExtension on_init") rte.on_init_done(manifest, property) - def on_start(self, rte: Rte) -> None: + def on_start(self, rte: RteEnv) -> None: logger.info("QWenLLMExtension on_start") self.api_key = rte.get_property_string("api_key") self.model = rte.get_property_string("model") @@ -178,7 +179,7 @@ def on_start(self, rte: Rte) -> None: self.thread.start() rte.on_start_done() - def on_stop(self, rte: Rte) -> None: + def on_stop(self, rte: RteEnv) -> None: logger.info("QWenLLMExtension on_stop") self.stopped = True self.queue.put(None) @@ -186,7 +187,7 @@ def on_stop(self, rte: Rte) -> None: self.thread.join() rte.on_stop_done() - def on_deinit(self, rte: Rte) -> None: + def on_deinit(self, rte: RteEnv) -> None: logger.info("QWenLLMExtension on_deinit") rte.on_deinit_done() @@ -195,7 +196,7 @@ def flush(self): while not self.queue.empty(): self.queue.get() - def on_data(self, rte: Rte, data: Data) -> None: + def on_data(self, rte: RteEnv, data: Data) -> None: logger.info("QWenLLMExtension on_data") is_final = data.get_property_bool("is_final") if not is_final: @@ -212,7 +213,7 @@ def on_data(self, rte: Rte, data: Data) -> None: logger.info("on data %s, %s", inputText, ts) self.queue.put((inputText, ts)) - def async_handle(self, rte: Rte): + def async_handle(self, rte: RteEnv): while not self.stopped: try: value = self.queue.get() @@ -227,7 +228,7 @@ def async_handle(self, rte: Rte): except Exception as e: logger.exception(e) - def on_cmd(self, rte: Rte, cmd: Cmd) -> None: + def on_cmd(self, rte: RteEnv, cmd: Cmd) -> None: logger.info("QWenLLMExtension on_cmd") cmd_json = cmd.to_json() logger.info("QWenLLMExtension on_cmd json: %s", cmd_json) @@ -246,6 +247,3 @@ def on_cmd(self, rte: Rte, cmd: Cmd) -> None: cmd_result = CmdResult.create(StatusCode.OK) rte.return_result(cmd_result, cmd) - - def on_image_frame(self, rte: Rte, image_frame: ImageFrame) -> None: - logger.info("QWenLLMExtension on_cmd") From 8f12ec46058324a7e54b104753332249b5c9edc4 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 15:55:18 +0000 Subject: [PATCH 58/72] chore: add clean --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index a27187110..beb9dfcbd 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,13 @@ build-server: cd server && go mod tidy && go mod download && go build -o bin/api main.go @echo ">> done" +clean: clean-agents + +clean-agents: + @echo ">> clean agents" + rm -rf agents/manifest.json agents/out agents/interface agents/include agents/lib agents/lib64 agents/addon/system agents/addon/extension_group agents/.release + @echo ">> done" + docker-build-server: @echo ">> docker build server" docker build -t $(REGISTRY)$(PROJECT_NAME):$(PROJECT_VERSION) --platform linux/amd64 -f Dockerfile . From dc9743ce60331f2b3852eb92529da6e9fa01ea39 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 15:58:17 +0000 Subject: [PATCH 59/72] feat: remove python start --- agents/bin/start | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100755 agents/bin/start diff --git a/agents/bin/start b/agents/bin/start deleted file mode 100755 index 68bb9e4e1..000000000 --- a/agents/bin/start +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -set -e - -cd "$(dirname "${BASH_SOURCE[0]}")/.." - -# python3 -m venv . -# cd bin -# source activate -# cd .. - -if [[ -f "requirements.txt" ]]; then - pip install -r requirements.txt -fi - -# traverse the addon/extension directory to find the requirements.txt -if [[ -d "addon/extension" ]]; then - for extension in addon/extension/*; do - if [[ -f "$extension/requirements.txt" ]]; then - pip install -r $extension/requirements.txt - fi - done -fi - -# if [[ -f "lib/libclang_rt.asan_osx_dynamic.dylib" ]]; then -# export DYLD_INSERT_LIBRARIES=lib/libclang_rt.asan_osx_dynamic.dylib -# fi - -# if [[ -f "lib/libasan.so" ]]; then -# export LD_PRELOAD=lib/libasan.so -# fi - -export PYTHONPATH=lib:interface -export RTE_HOME=. -# export PYTHONMALLOC=malloc - -python3 main.py "$@" From 9ff562f2576db603c2946134ff69bb20563fb299 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 16:02:14 +0000 Subject: [PATCH 60/72] chore: also clean bin folder --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index beb9dfcbd..aea91c82c 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ clean: clean-agents clean-agents: @echo ">> clean agents" - rm -rf agents/manifest.json agents/out agents/interface agents/include agents/lib agents/lib64 agents/addon/system agents/addon/extension_group agents/.release + rm -rf agents/manifest.json agents/bin agents/out agents/interface agents/include agents/lib agents/lib64 agents/addon/system agents/addon/extension_group agents/.release @echo ">> done" docker-build-server: From 9a788fbb8c587217b5e7ac4e24091a414954e68f Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 16:02:58 +0000 Subject: [PATCH 61/72] feat: upgrade dependencies --- agents/manifest.json.cn.bedrock.example | 26 ++++++++++++++------- agents/manifest.json.cn.example | 26 ++++++++++++++------- agents/manifest.json.cn.openai.example | 26 ++++++++++++++------- agents/manifest.json.elevenlabs.example | 31 +++++++++++-------------- agents/manifest.json.en.bedrock.example | 26 ++++++++++++++------- agents/manifest.json.example | 26 ++++++++++++++------- agents/manifest.json.qwen.example | 26 ++++++++++++++------- 7 files changed, 121 insertions(+), 66 deletions(-) diff --git a/agents/manifest.json.cn.bedrock.example b/agents/manifest.json.cn.bedrock.example index 7963b536b..0a8ae550c 100644 --- a/agents/manifest.json.cn.bedrock.example +++ b/agents/manifest.json.cn.bedrock.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "0.2.1" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example index 0040a8142..2c42d08f9 100644 --- a/agents/manifest.json.cn.example +++ b/agents/manifest.json.cn.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "0.2.0-alpha" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.cn.openai.example b/agents/manifest.json.cn.openai.example index 8a8f6b1e6..3f0625d6a 100644 --- a/agents/manifest.json.cn.openai.example +++ b/agents/manifest.json.cn.openai.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "0.2.1" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.elevenlabs.example b/agents/manifest.json.elevenlabs.example index 0f2ee255b..e4e45ae6c 100644 --- a/agents/manifest.json.elevenlabs.example +++ b/agents/manifest.json.elevenlabs.example @@ -5,34 +5,29 @@ "language": "go", "dependencies": [ { - "type": "extension_group", - "name": "default_extension_group", - "version": "0.1.0" + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" }, { - "type": "extension", - "name": "agora_rtc", - "version": "0.1.1" + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" }, { "type": "extension", - "name": "chat_transcriber", - "version": "0.1.0" + "name": "py_init_extension_cpp", + "version": "0.3.1" }, { - "type": "extension", - "name": "elevenlabs_tts", - "version": "0.1.0" - }, - { - "type": "extension", - "name": "interrupt_detector", - "version": "0.1.0" + "type": "extension_group", + "name": "default_extension_group", + "version": "0.3.1" }, { "type": "extension", - "name": "openai_chatgpt", - "version": "0.1.0" + "name": "agora_rtc", + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.en.bedrock.example b/agents/manifest.json.en.bedrock.example index 7963b536b..0a8ae550c 100644 --- a/agents/manifest.json.en.bedrock.example +++ b/agents/manifest.json.en.bedrock.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "0.2.1" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 8494b15c1..496803f0e 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "0.2.1" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ diff --git a/agents/manifest.json.qwen.example b/agents/manifest.json.qwen.example index 989d70160..f7481e23e 100644 --- a/agents/manifest.json.qwen.example +++ b/agents/manifest.json.qwen.example @@ -2,22 +2,32 @@ "type": "app", "name": "astra_agents", "version": "0.2.0", - "language": "python", + "language": "go", "dependencies": [ + { + "type": "system", + "name": "rte_runtime_python", + "version": "0.3.1" + }, + { + "type": "system", + "name": "rte_runtime_go", + "version": "0.3.1" + }, + { + "type": "extension", + "name": "py_init_extension_cpp", + "version": "0.3.1" + }, { "type": "extension_group", "name": "default_extension_group", - "version": "0.2.0" + "version": "0.3.1" }, { "type": "extension", "name": "agora_rtc", - "version": "=0.3.0-databuf" - }, - { - "type": "system", - "name": "rte_runtime_python", - "version": "0.2.0" + "version": "0.3.0-rc1" } ], "predefined_graphs": [ From 9ebd7befb9f9dd5abe9d1ffb2063ac2f5a7502dd Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2024 16:03:21 +0000 Subject: [PATCH 62/72] chore: add vscode settings --- .vscode/settings.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..b0181771b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.analysis.extraPaths": [ + "./agents/interface", + ], + "editor.formatOnSave": true, + } \ No newline at end of file From 290521d5ca08dce23df70fae6d4bc2dca82287c3 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 14:45:05 +0000 Subject: [PATCH 63/72] chore: remove useless deps --- agents/addon/extension/bedrock_llm_python/requirements.txt | 3 --- agents/addon/extension/openai_chatgpt_python/requirements.txt | 1 - 2 files changed, 4 deletions(-) diff --git a/agents/addon/extension/bedrock_llm_python/requirements.txt b/agents/addon/extension/bedrock_llm_python/requirements.txt index 3f1da6f45..0a92c2bea 100644 --- a/agents/addon/extension/bedrock_llm_python/requirements.txt +++ b/agents/addon/extension/bedrock_llm_python/requirements.txt @@ -1,4 +1 @@ -# pillow==10.4.0 -# openai==1.35.13 -# requests==2.32.3 boto3==1.34.143 \ No newline at end of file diff --git a/agents/addon/extension/openai_chatgpt_python/requirements.txt b/agents/addon/extension/openai_chatgpt_python/requirements.txt index c8aae8b65..de1e7f468 100644 --- a/agents/addon/extension/openai_chatgpt_python/requirements.txt +++ b/agents/addon/extension/openai_chatgpt_python/requirements.txt @@ -1,3 +1,2 @@ -# pillow==10.4.0 openai==1.35.13 requests==2.32.3 \ No newline at end of file From 89fb8d4fd83804fe601620610b61b90ffb659a6e Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 14:45:19 +0000 Subject: [PATCH 64/72] chore: typo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aea91c82c..7e3e4514e 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ build: build-agents build-server build-agents: @echo ">> build agents" - cd agents && ./scripts/install_deps_and_build.sh linux x64 && mv ./bin/main ./bin/worker + cd agents && ./scripts/install_deps_and_build.sh linux x64 && mv bin/main bin/worker @echo ">> done" build-server: From a359ec560af60106fe3805ea200db8bb5ebd76d0 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 14:45:30 +0000 Subject: [PATCH 65/72] chore: log level --- agents/property.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/property.json b/agents/property.json index 25c3d21a6..64810e6ab 100644 --- a/agents/property.json +++ b/agents/property.json @@ -1,5 +1,5 @@ { "rte": { - "log_level": 1 + "log_level": 3 } -} +} \ No newline at end of file From f57907bf20b7cdc619cd4e25daa17b460250facd Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 14:45:44 +0000 Subject: [PATCH 66/72] feat: start by worker --- server/internal/worker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/internal/worker.go b/server/internal/worker.go index 14a35c8c6..e3daf80a8 100644 --- a/server/internal/worker.go +++ b/server/internal/worker.go @@ -24,7 +24,7 @@ type Worker struct { const ( workerCleanSleepSeconds = 5 - workerExec = "/app/agents/bin/start" + workerExec = "/app/agents/bin/worker" ) var ( From b9a60f51cc9e0ce7276259d73e1baae0bc82aeb6 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 15:02:50 +0000 Subject: [PATCH 67/72] fix: chat transcriber --- .../chat_transcriber_extension.py | 23 +++++-------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py index 66fccda18..de8a3dc6a 100644 --- a/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py +++ b/agents/addon/extension/chat_transcriber_python/chat_transcriber_extension.py @@ -72,7 +72,7 @@ def on_data(self, rte: RteEnv, data: Data) -> None: try: text = data.get_property_string(TEXT_DATA_TEXT_FIELD) except Exception as e: - logger.warning( + logger.exception( f"on_data get_property_string {TEXT_DATA_TEXT_FIELD} error: {e}" ) return @@ -80,7 +80,7 @@ def on_data(self, rte: RteEnv, data: Data) -> None: try: final = data.get_property_bool(TEXT_DATA_FINAL_FIELD) except Exception as e: - logger.warning( + logger.exception( f"on_data get_property_bool {TEXT_DATA_FINAL_FIELD} error: {e}" ) return @@ -88,7 +88,7 @@ def on_data(self, rte: RteEnv, data: Data) -> None: try: stream_id = data.get_property_int(TEXT_DATA_STREAM_ID_FIELD) except Exception as e: - logger.warning( + logger.exception( f"on_data get_property_int {TEXT_DATA_STREAM_ID_FIELD} error: {e}" ) return @@ -96,7 +96,7 @@ def on_data(self, rte: RteEnv, data: Data) -> None: try: end_of_segment = data.get_property_bool(TEXT_DATA_END_OF_SEGMENT_FIELD) except Exception as e: - logger.warning( + logger.exception( f"on_data get_property_bool {TEXT_DATA_END_OF_SEGMENT_FIELD} error: {e}" ) return @@ -131,14 +131,7 @@ def on_data(self, rte: RteEnv, data: Data) -> None: ) try: - text = json.dumps( - { - "uid": stream_id, - "text": text, - "is_final": end_of_segment, - } - ) - text_buf = text.encode("utf-8") + pb_serialized_text = pb_text.SerializeToString() except Exception as e: logger.warning(f"on_data SerializeToString error: {e}") return @@ -146,11 +139,7 @@ def on_data(self, rte: RteEnv, data: Data) -> None: try: # convert the origin text data to the protobuf data and send it to the graph. rte_data = Data.create("data") - # rte_data.set_property_string("data", pb_serialized_text) - rte_data.alloc_buf(len(text_buf)) - buf = rte_data.lock_buf() - buf[:] = text_buf[:] - rte_data.unlock_buf(buf) + rte_data.set_property_buf("data", pb_serialized_text) rte.send_data(rte_data) logger.info("data sent") except Exception as e: From 8755d5460a4616f36f6fc8ac56d0f29203d5ef3d Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 15:31:37 +0000 Subject: [PATCH 68/72] fix: package --- Dockerfile | 4 +++- agents/scripts/package.sh | 9 +++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index b7518c99c..605bce580 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/rte-design/astra_agents_build:0.3.2 AS builder +FROM ghcr.io/rte-design/astra_agents_build:0.3.3 AS builder ARG SESSION_CONTROL_CONF=session_control.conf @@ -34,6 +34,8 @@ WORKDIR /app COPY --from=builder /app/agents/.release/ agents/ COPY --from=builder /app/server/bin/api /app/server/bin/api +COPY --from=builder /usr/local/lib /usr/local/lib +COPY --from=builder /usr/lib/python3 /usr/lib/python3 EXPOSE 8080 diff --git a/agents/scripts/package.sh b/agents/scripts/package.sh index f41f198f7..4c3ba1d04 100755 --- a/agents/scripts/package.sh +++ b/agents/scripts/package.sh @@ -47,13 +47,10 @@ cp manifest.cn.json .release cp manifest.en.json .release cp property.json .release -# python main and deps -if [[ -f main.py ]]; then - cp main.py .release -fi -if [[ -d interface/rte_runtime_python ]]; then +# python deps +if [[ -d interface/rte ]]; then mkdir -p .release/interface - cp -r interface/rte_runtime_python .release/interface + cp -r interface/rte .release/interface fi # extension group From 632ccdcadef96e5987a59fa78f1d56f4c53adcc1 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 25 Jul 2024 15:36:57 +0000 Subject: [PATCH 69/72] fix: default examples --- agents/manifest.json.cn.example | 56 +++++++++++++++++++++++++- agents/manifest.json.en.example | 71 ++++++++++++++++++++++++++------- agents/manifest.json.example | 71 ++++++++++++++++++++++++++------- 3 files changed, 169 insertions(+), 29 deletions(-) diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example index 2c42d08f9..6aed9da4c 100644 --- a/agents/manifest.json.cn.example +++ b/agents/manifest.json.cn.example @@ -82,6 +82,12 @@ "sample_rate": 16000 } }, + { + "type": "extension", + "extension_group": "chat_transcriber", + "addon": "chat_transcriber_python", + "name": "chat_transcriber" + }, { "type": "extension", "extension_group": "default", @@ -102,6 +108,11 @@ "type": "extension_group", "addon": "default_extension_group", "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "chat_transcriber" } ], "connections": [ @@ -115,6 +126,10 @@ { "extension_group": "default", "extension": "interrupt_detector" + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber" } ] } @@ -167,7 +182,31 @@ { "extension_group": "tts", "extension": "cosy_tts" - } + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] + } ] } ], @@ -208,6 +247,21 @@ ] } ] + }, + { + "extension_group": "chat_transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest":[ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] } ] } diff --git a/agents/manifest.json.en.example b/agents/manifest.json.en.example index 496803f0e..4ffa02dfb 100644 --- a/agents/manifest.json.en.example +++ b/agents/manifest.json.en.example @@ -57,10 +57,16 @@ "agora_asr_session_control_file_path": "session_control.conf" } }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, { "type": "extension", "extension_group": "chatgpt", - "addon": "openai_chatgpt_python", + "addon": "openai_chatgpt", "name": "openai_chatgpt", "property": { "base_url": "", @@ -87,9 +93,9 @@ }, { "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector_python", - "name": "interrupt_detector" + "extension_group": "transcriber", + "addon": "chat_transcriber", + "name": "chat_transcriber" }, { "type": "extension_group", @@ -105,6 +111,11 @@ "type": "extension_group", "addon": "default_extension_group", "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "transcriber" } ], "connections": [ @@ -122,17 +133,10 @@ { "extension_group": "chatgpt", "extension": "openai_chatgpt" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ + }, { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" + "extension_group": "transcriber", + "extension": "chat_transcriber" } ] } @@ -148,6 +152,30 @@ { "extension_group": "tts", "extension": "azure_tts" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] } ] } @@ -190,6 +218,21 @@ } ] }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, { "extension_group": "default", "extension": "interrupt_detector", diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 496803f0e..4ffa02dfb 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -57,10 +57,16 @@ "agora_asr_session_control_file_path": "session_control.conf" } }, + { + "type": "extension", + "extension_group": "default", + "addon": "interrupt_detector", + "name": "interrupt_detector" + }, { "type": "extension", "extension_group": "chatgpt", - "addon": "openai_chatgpt_python", + "addon": "openai_chatgpt", "name": "openai_chatgpt", "property": { "base_url": "", @@ -87,9 +93,9 @@ }, { "type": "extension", - "extension_group": "default", - "addon": "interrupt_detector_python", - "name": "interrupt_detector" + "extension_group": "transcriber", + "addon": "chat_transcriber", + "name": "chat_transcriber" }, { "type": "extension_group", @@ -105,6 +111,11 @@ "type": "extension_group", "addon": "default_extension_group", "name": "tts" + }, + { + "type": "extension_group", + "addon": "default_extension_group", + "name": "transcriber" } ], "connections": [ @@ -122,17 +133,10 @@ { "extension_group": "chatgpt", "extension": "openai_chatgpt" - } - ] - } - ], - "pcm_frame": [ - { - "name": "pcm_frame", - "dest": [ + }, { - "extension_group": "chatgpt", - "extension": "openai_chatgpt" + "extension_group": "transcriber", + "extension": "chat_transcriber" } ] } @@ -148,6 +152,30 @@ { "extension_group": "tts", "extension": "azure_tts" + }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "cmd_conversions": [ + { + "cmd": { + "type": "per_property", + "keep_original": true, + "rules": [ + { + "path": "is_final", + "type": "fixed_value", + "value": "bool(true)" + }, + { + "path": "stream_id", + "type": "fixed_value", + "value": "uint32(999)" + } + ] + } + } + ] } ] } @@ -190,6 +218,21 @@ } ] }, + { + "extension_group": "transcriber", + "extension": "chat_transcriber", + "data": [ + { + "name": "data", + "dest": [ + { + "extension_group": "default", + "extension": "agora_rtc" + } + ] + } + ] + }, { "extension_group": "default", "extension": "interrupt_detector", From 05c6d12dd1cc5ee16d45fb84e44679cab4a2eae1 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Fri, 26 Jul 2024 00:58:13 +0000 Subject: [PATCH 70/72] feat: only main updates latest image --- .github/workflows/build-docker.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index dffb68741..0642022c7 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -36,6 +36,6 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} registry: ghcr.io - tags: "latest,${{ steps.pre-step.outputs.image-tag }}" + tags: "${{ github.ref == 'refs/heads/main' && 'latest,' || '' }}${{ steps.pre-step.outputs.image-tag }}" no_push: ${{ github.event_name == 'pull_request' }} From d0e7187e47d2b857799fdc79deccd4a0667c09a8 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Fri, 26 Jul 2024 00:58:40 +0000 Subject: [PATCH 71/72] test: build image --- .github/workflows/build-docker.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index 0642022c7..d4d2710fd 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -2,7 +2,7 @@ name: Build Docker on: push: - branches: [ "main" ] + branches: [ "main", "python-experimental-develop" ] # Publish semver tags as releases. tags: [ 'v*.*.*' ] paths-ignore: From bcf072caf24852cf7db08d6386802ce30abae121 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Fri, 26 Jul 2024 01:14:14 +0000 Subject: [PATCH 72/72] feat: workaround to avoid too many warning logs --- agents/manifest.json.cn.example | 4 ++-- agents/manifest.json.en.example | 11 +++++++++++ agents/manifest.json.example | 11 +++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/agents/manifest.json.cn.example b/agents/manifest.json.cn.example index 6aed9da4c..4127fc028 100644 --- a/agents/manifest.json.cn.example +++ b/agents/manifest.json.cn.example @@ -139,8 +139,8 @@ "name": "pcm_frame", "dest": [ { - "extension_group": "llm", - "extension": "qwen_llm" + "extension_group": "default", + "extension": "interrupt_detector" } ] } diff --git a/agents/manifest.json.en.example b/agents/manifest.json.en.example index 4ffa02dfb..8bbd42ba0 100644 --- a/agents/manifest.json.en.example +++ b/agents/manifest.json.en.example @@ -140,6 +140,17 @@ } ] } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } ] }, { diff --git a/agents/manifest.json.example b/agents/manifest.json.example index 4ffa02dfb..8bbd42ba0 100644 --- a/agents/manifest.json.example +++ b/agents/manifest.json.example @@ -140,6 +140,17 @@ } ] } + ], + "pcm_frame": [ + { + "name": "pcm_frame", + "dest": [ + { + "extension_group": "default", + "extension": "interrupt_detector" + } + ] + } ] }, {