Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions client/inprocess_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func TestInProcessMCPClient(t *testing.T) {
Type: "text",
Text: "Input parameter: " + request.Params.Arguments["parameter-1"].(string),
},
mcp.AudioContent{
Type: "audio",
Data: "base64-encoded-audio-data",
MIMEType: "audio/wav",
},
},
}, nil
})
Expand Down Expand Up @@ -77,6 +82,14 @@ func TestInProcessMCPClient(t *testing.T) {
Text: "Test prompt with arg1: " + request.Params.Arguments["arg1"],
},
},
{
Role: mcp.RoleUser,
Content: mcp.AudioContent{
Type: "audio",
Data: "base64-encoded-audio-data",
MIMEType: "audio/wav",
},
},
},
}, nil
},
Expand Down Expand Up @@ -192,7 +205,7 @@ func TestInProcessMCPClient(t *testing.T) {
t.Fatalf("CallTool failed: %v", err)
}

if len(result.Content) != 1 {
if len(result.Content) != 2 {
t.Errorf("Expected 1 content item, got %d", len(result.Content))
}
})
Expand Down Expand Up @@ -359,14 +372,17 @@ func TestInProcessMCPClient(t *testing.T) {

request := mcp.GetPromptRequest{}
request.Params.Name = "test-prompt"
request.Params.Arguments = map[string]string{
"arg1": "arg1 value",
}

result, err := client.GetPrompt(context.Background(), request)
if err != nil {
t.Errorf("GetPrompt failed: %v", err)
}

if len(result.Messages) != 1 {
t.Errorf("Expected 1 message, got %d", len(result.Messages))
if len(result.Messages) != 2 {
t.Errorf("Expected 2 message, got %d", len(result.Messages))
}
})

Expand Down
2 changes: 1 addition & 1 deletion mcp/prompts.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ const (
// resources from the MCP server.
type PromptMessage struct {
Role Role `json:"role"`
Content Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
Content Content `json:"content"` // Can be TextContent, ImageContent, AudioContent or EmbeddedResource
}

// PromptListChangedNotification is an optional notification from the server
Expand Down
2 changes: 1 addition & 1 deletion mcp/tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type ListToolsResult struct {
// should be reported as an MCP error response.
type CallToolResult struct {
Result
Content []Content `json:"content"` // Can be TextContent, ImageContent, or EmbeddedResource
Content []Content `json:"content"` // Can be TextContent, ImageContent, AudioContent, or EmbeddedResource
// Whether the tool call ended in an error.
//
// If not set, this is assumed to be false (the call was successful).
Expand Down
15 changes: 14 additions & 1 deletion mcp/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ type CreateMessageResult struct {
// SamplingMessage describes a message issued to or received from an LLM API.
type SamplingMessage struct {
Role Role `json:"role"`
Content interface{} `json:"content"` // Can be TextContent or ImageContent
Content interface{} `json:"content"` // Can be TextContent, ImageContent or AudioContent
}

type Annotations struct {
Expand Down Expand Up @@ -709,6 +709,19 @@ type ImageContent struct {

func (ImageContent) isContent() {}

// AudioContent represents the contents of audio, embedded into a prompt or tool call result.
// It must have Type set to "audio".
type AudioContent struct {
Annotated
Type string `json:"type"` // Must be "audio"
// The base64-encoded audio data.
Data string `json:"data"`
// The MIME type of the audio. Different providers may support different audio types.
MIMEType string `json:"mimeType"`
}

func (AudioContent) isContent() {}

// EmbeddedResource represents the contents of a resource, embedded into a prompt or tool call result.
//
// It is up to the client how best to render embedded resources for the
Expand Down
39 changes: 39 additions & 0 deletions mcp/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ func AsImageContent(content interface{}) (*ImageContent, bool) {
return asType[ImageContent](content)
}

// AsImageContent attempts to cast the given interface to AudioContent
func AsAudioContent(content interface{}) (*AudioContent, bool) {
return asType[AudioContent](content)
}

// AsEmbeddedResource attempts to cast the given interface to EmbeddedResource
func AsEmbeddedResource(content interface{}) (*EmbeddedResource, bool) {
return asType[EmbeddedResource](content)
Expand Down Expand Up @@ -202,6 +207,15 @@ func NewImageContent(data, mimeType string) ImageContent {
}
}

// Helper function to create a new AudioContent
func NewAudioContent(data, mimeType string) AudioContent {
return AudioContent{
Type: "audio",
Data: data,
MIMEType: mimeType,
}
}

// Helper function to create a new EmbeddedResource
func NewEmbeddedResource(resource ResourceContents) EmbeddedResource {
return EmbeddedResource{
Expand Down Expand Up @@ -239,6 +253,23 @@ func NewToolResultImage(text, imageData, mimeType string) *CallToolResult {
}
}

// NewToolResultAudio creates a new CallToolResult with both text and audio content
func NewToolResultAudio(text, imageData, mimeType string) *CallToolResult {
return &CallToolResult{
Content: []Content{
TextContent{
Type: "text",
Text: text,
},
AudioContent{
Type: "audio",
Data: imageData,
MIMEType: mimeType,
},
},
}
}
Comment on lines +256 to +271
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Fix parameter name in NewToolResultAudio function.

The parameter is named imageData but is being used for audio data, which is confusing and misleading. This should be renamed to match its actual purpose.

-func NewToolResultAudio(text, imageData, mimeType string) *CallToolResult {
+func NewToolResultAudio(text, audioData, mimeType string) *CallToolResult {
    return &CallToolResult{
        Content: []Content{
            TextContent{
                Type: "text",
                Text: text,
            },
            AudioContent{
                Type:     "audio",
-               Data:     imageData,
+               Data:     audioData,
                MIMEType: mimeType,
            },
        },
    }
}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// NewToolResultAudio creates a new CallToolResult with both text and audio content
func NewToolResultAudio(text, imageData, mimeType string) *CallToolResult {
return &CallToolResult{
Content: []Content{
TextContent{
Type: "text",
Text: text,
},
AudioContent{
Type: "audio",
Data: imageData,
MIMEType: mimeType,
},
},
}
}
// NewToolResultAudio creates a new CallToolResult with both text and audio content
func NewToolResultAudio(text, audioData, mimeType string) *CallToolResult {
return &CallToolResult{
Content: []Content{
TextContent{
Type: "text",
Text: text,
},
AudioContent{
Type: "audio",
Data: audioData,
MIMEType: mimeType,
},
},
}
}


// NewToolResultResource creates a new CallToolResult with an embedded resource
func NewToolResultResource(
text string,
Expand Down Expand Up @@ -415,6 +446,14 @@ func ParseContent(contentMap map[string]any) (Content, error) {
}
return NewImageContent(data, mimeType), nil

case "audio":
data := ExtractString(contentMap, "data")
mimeType := ExtractString(contentMap, "mimeType")
if data == "" || mimeType == "" {
return nil, fmt.Errorf("audio data or mimeType is missing")
}
return NewAudioContent(data, mimeType), nil

case "resource":
resourceMap := ExtractMap(contentMap, "resource")
if resourceMap == nil {
Expand Down