Skip to content

Commit de256af

Browse files
committed
support audio content type
1 parent 33c98f1 commit de256af

File tree

5 files changed

+75
-7
lines changed

5 files changed

+75
-7
lines changed

client/inprocess_test.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ func TestInProcessMCPClient(t *testing.T) {
3636
Type: "text",
3737
Text: "Input parameter: " + request.Params.Arguments["parameter-1"].(string),
3838
},
39+
mcp.AudioContent{
40+
Type: "audio",
41+
Data: "base64-encoded-audio-data",
42+
MIMEType: "audio/wav",
43+
},
3944
},
4045
}, nil
4146
})
@@ -77,6 +82,14 @@ func TestInProcessMCPClient(t *testing.T) {
7782
Text: "Test prompt with arg1: " + request.Params.Arguments["arg1"],
7883
},
7984
},
85+
{
86+
Role: mcp.RoleUser,
87+
Content: mcp.AudioContent{
88+
Type: "audio",
89+
Data: "base64-encoded-audio-data",
90+
MIMEType: "audio/wav",
91+
},
92+
},
8093
},
8194
}, nil
8295
},
@@ -192,8 +205,8 @@ func TestInProcessMCPClient(t *testing.T) {
192205
t.Fatalf("CallTool failed: %v", err)
193206
}
194207

195-
if len(result.Content) != 1 {
196-
t.Errorf("Expected 1 content item, got %d", len(result.Content))
208+
if len(result.Content) != 2 {
209+
t.Errorf("Expected 2 content item, got %d", len(result.Content))
197210
}
198211
})
199212

@@ -359,14 +372,17 @@ func TestInProcessMCPClient(t *testing.T) {
359372

360373
request := mcp.GetPromptRequest{}
361374
request.Params.Name = "test-prompt"
375+
request.Params.Arguments = map[string]string{
376+
"arg1": "arg1 value",
377+
}
362378

363379
result, err := client.GetPrompt(context.Background(), request)
364380
if err != nil {
365381
t.Errorf("GetPrompt failed: %v", err)
366382
}
367383

368-
if len(result.Messages) != 1 {
369-
t.Errorf("Expected 1 message, got %d", len(result.Messages))
384+
if len(result.Messages) != 2 {
385+
t.Errorf("Expected 2 message, got %d", len(result.Messages))
370386
}
371387
})
372388

mcp/prompts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ const (
7878
// resources from the MCP server.
7979
type PromptMessage struct {
8080
Role Role `json:"role"`
81-
Content Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
81+
Content Content `json:"content"` // Can be TextContent, ImageContent, AudioContent or EmbeddedResource
8282
}
8383

8484
// PromptListChangedNotification is an optional notification from the server

mcp/tools.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type ListToolsResult struct {
3333
// should be reported as an MCP error response.
3434
type CallToolResult struct {
3535
Result
36-
Content []Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
36+
Content []Content `json:"content"` // Can be TextContent, ImageContent, AudioContent, or EmbeddedResource
3737
// Whether the tool call ended in an error.
3838
//
3939
// If not set, this is assumed to be false (the call was successful).

mcp/types.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ type CreateMessageResult struct {
656656
// SamplingMessage describes a message issued to or received from an LLM API.
657657
type SamplingMessage struct {
658658
Role Role `json:"role"`
659-
Content interface{} `json:"content"` // Can be TextContent or ImageContent
659+
Content interface{} `json:"content"` // Can be TextContent, ImageContent or AudioContent
660660
}
661661

662662
type Annotations struct {
@@ -709,6 +709,19 @@ type ImageContent struct {
709709

710710
func (ImageContent) isContent() {}
711711

712+
// AudioContent represents the contents of audio, embedded into a prompt or tool call result.
713+
// It must have Type set to "audio".
714+
type AudioContent struct {
715+
Annotated
716+
Type string `json:"type"` // Must be "audio"
717+
// The base64-encoded audio data.
718+
Data string `json:"data"`
719+
// The MIME type of the audio. Different providers may support different audio types.
720+
MIMEType string `json:"mimeType"`
721+
}
722+
723+
func (AudioContent) isContent() {}
724+
712725
// EmbeddedResource represents the contents of a resource, embedded into a prompt or tool call result.
713726
//
714727
// It is up to the client how best to render embedded resources for the

mcp/utils.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ func AsImageContent(content interface{}) (*ImageContent, bool) {
7777
return asType[ImageContent](content)
7878
}
7979

80+
// AsImageContent attempts to cast the given interface to AudioContent
81+
func AsAudioContent(content interface{}) (*AudioContent, bool) {
82+
return asType[AudioContent](content)
83+
}
84+
8085
// AsEmbeddedResource attempts to cast the given interface to EmbeddedResource
8186
func AsEmbeddedResource(content interface{}) (*EmbeddedResource, bool) {
8287
return asType[EmbeddedResource](content)
@@ -202,6 +207,15 @@ func NewImageContent(data, mimeType string) ImageContent {
202207
}
203208
}
204209

210+
// Helper function to create a new AudioContent
211+
func NewAudioContent(data, mimeType string) AudioContent {
212+
return AudioContent{
213+
Type: "audio",
214+
Data: data,
215+
MIMEType: mimeType,
216+
}
217+
}
218+
205219
// Helper function to create a new EmbeddedResource
206220
func NewEmbeddedResource(resource ResourceContents) EmbeddedResource {
207221
return EmbeddedResource{
@@ -239,6 +253,23 @@ func NewToolResultImage(text, imageData, mimeType string) *CallToolResult {
239253
}
240254
}
241255

256+
// NewToolResultAudio creates a new CallToolResult with both text and audio content
257+
func NewToolResultAudio(text, imageData, mimeType string) *CallToolResult {
258+
return &CallToolResult{
259+
Content: []Content{
260+
TextContent{
261+
Type: "text",
262+
Text: text,
263+
},
264+
AudioContent{
265+
Type: "audio",
266+
Data: imageData,
267+
MIMEType: mimeType,
268+
},
269+
},
270+
}
271+
}
272+
242273
// NewToolResultResource creates a new CallToolResult with an embedded resource
243274
func NewToolResultResource(
244275
text string,
@@ -415,6 +446,14 @@ func ParseContent(contentMap map[string]any) (Content, error) {
415446
}
416447
return NewImageContent(data, mimeType), nil
417448

449+
case "audio":
450+
data := ExtractString(contentMap, "data")
451+
mimeType := ExtractString(contentMap, "mimeType")
452+
if data == "" || mimeType == "" {
453+
return nil, fmt.Errorf("audio data or mimeType is missing")
454+
}
455+
return NewAudioContent(data, mimeType), nil
456+
418457
case "resource":
419458
resourceMap := ExtractMap(contentMap, "resource")
420459
if resourceMap == nil {

0 commit comments

Comments
 (0)