Skip to content

Commit 101f482

Browse files
committed
Squash changes
1 parent 85ad929 commit 101f482

File tree

10 files changed

+240
-37
lines changed

10 files changed

+240
-37
lines changed

dotnet/src/AutoGen.Anthropic/AnthropicClient.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ public sealed class AnthropicClient : IDisposable
2323

2424
private static readonly JsonSerializerOptions JsonSerializerOptions = new()
2525
{
26-
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
26+
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
27+
Converters = { new ContentBaseConverter() }
2728
};
2829

2930
private static readonly JsonSerializerOptions JsonDeserializerOptions = new()

dotnet/src/AutoGen.Anthropic/DTO/ChatCompletionRequest.cs

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22

33
using System.Text.Json.Serialization;
4+
using System.Collections.Generic;
45

56
namespace AutoGen.Anthropic.DTO;
67

7-
using System.Collections.Generic;
8-
98
public class ChatCompletionRequest
109
{
1110
[JsonPropertyName("model")]
@@ -50,9 +49,15 @@ public class ChatMessage
5049
public string Role { get; set; }
5150

5251
[JsonPropertyName("content")]
53-
public string Content { get; set; }
52+
public List<ContentBase> Content { get; set; }
5453

5554
public ChatMessage(string role, string content)
55+
{
56+
Role = role;
57+
Content = new List<ContentBase>() { new TextContent { Text = content } };
58+
}
59+
60+
public ChatMessage(string role, List<ContentBase> content)
5661
{
5762
Role = role;
5863
Content = content;

dotnet/src/AutoGen.Anthropic/Middleware/AnthropicMessageConnector.cs

+91-21
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System;
55
using System.Collections.Generic;
66
using System.Linq;
7+
using System.Net.Http;
78
using System.Runtime.CompilerServices;
89
using System.Threading;
910
using System.Threading.Tasks;
@@ -19,7 +20,7 @@ public class AnthropicMessageConnector : IStreamingMiddleware
1920
public async Task<IMessage> InvokeAsync(MiddlewareContext context, IAgent agent, CancellationToken cancellationToken = default)
2021
{
2122
var messages = context.Messages;
22-
var chatMessages = ProcessMessage(messages, agent);
23+
var chatMessages = await ProcessMessageAsync(messages, agent);
2324
var response = await agent.GenerateReplyAsync(chatMessages, context.Options, cancellationToken);
2425

2526
return response is IMessage<ChatCompletionResponse> chatMessage
@@ -31,7 +32,7 @@ public async IAsyncEnumerable<IStreamingMessage> InvokeAsync(MiddlewareContext c
3132
[EnumeratorCancellation] CancellationToken cancellationToken = default)
3233
{
3334
var messages = context.Messages;
34-
var chatMessages = ProcessMessage(messages, agent);
35+
var chatMessages = await ProcessMessageAsync(messages, agent);
3536

3637
await foreach (var reply in agent.GenerateStreamingReplyAsync(chatMessages, context.Options, cancellationToken))
3738
{
@@ -53,60 +54,78 @@ public async IAsyncEnumerable<IStreamingMessage> InvokeAsync(MiddlewareContext c
5354
private IStreamingMessage? ProcessChatCompletionResponse(IStreamingMessage<ChatCompletionResponse> chatMessage,
5455
IStreamingAgent agent)
5556
{
56-
Delta? delta = chatMessage.Content.Delta;
57+
var delta = chatMessage.Content.Delta;
5758
return delta != null && !string.IsNullOrEmpty(delta.Text)
5859
? new TextMessageUpdate(role: Role.Assistant, delta.Text, from: agent.Name)
5960
: null;
6061
}
6162

62-
private IEnumerable<IMessage> ProcessMessage(IEnumerable<IMessage> messages, IAgent agent)
63+
private async Task<IEnumerable<IMessage>> ProcessMessageAsync(IEnumerable<IMessage> messages, IAgent agent)
6364
{
64-
return messages.SelectMany<IMessage, IMessage>(m =>
65+
var processedMessages = new List<IMessage>();
66+
67+
foreach (var message in messages)
6568
{
66-
return m switch
69+
var processedMessage = message switch
6770
{
6871
TextMessage textMessage => ProcessTextMessage(textMessage, agent),
69-
_ => [m],
72+
73+
ImageMessage imageMessage =>
74+
new MessageEnvelope<ChatMessage>(new ChatMessage("user",
75+
new ContentBase[] { new ImageContent { Source = await ProcessImageSourceAsync(imageMessage) } }
76+
.ToList()),
77+
from: agent.Name),
78+
79+
MultiModalMessage multiModalMessage => await ProcessMultiModalMessageAsync(multiModalMessage, agent),
80+
_ => message,
7081
};
71-
});
82+
83+
processedMessages.Add(processedMessage);
84+
}
85+
86+
return processedMessages;
7287
}
7388

7489
private IMessage PostProcessMessage(ChatCompletionResponse response, IAgent from)
7590
{
7691
if (response.Content is null)
92+
{
7793
throw new ArgumentNullException(nameof(response.Content));
94+
}
7895

7996
if (response.Content.Count != 1)
97+
{
8098
throw new NotSupportedException($"{nameof(response.Content)} != 1");
99+
}
81100

82101
return new TextMessage(Role.Assistant, ((TextContent)response.Content[0]).Text ?? string.Empty, from: from.Name);
83102
}
84103

85-
private IEnumerable<IMessage<ChatMessage>> ProcessTextMessage(TextMessage textMessage, IAgent agent)
104+
private IMessage<ChatMessage> ProcessTextMessage(TextMessage textMessage, IAgent agent)
86105
{
87-
IEnumerable<ChatMessage> messages;
106+
ChatMessage messages;
88107

89108
if (textMessage.From == agent.Name)
90109
{
91-
messages = [new ChatMessage(
92-
"assistant", textMessage.Content)];
110+
messages = new ChatMessage(
111+
"assistant", textMessage.Content);
93112
}
94113
else if (textMessage.From is null)
95114
{
96115
if (textMessage.Role == Role.User)
97116
{
98-
messages = [new ChatMessage(
99-
"user", textMessage.Content)];
117+
messages = new ChatMessage(
118+
"user", textMessage.Content);
100119
}
101120
else if (textMessage.Role == Role.Assistant)
102121
{
103-
messages = [new ChatMessage(
104-
"assistant", textMessage.Content)];
122+
messages = new ChatMessage(
123+
"assistant", textMessage.Content);
105124
}
106125
else if (textMessage.Role == Role.System)
107126
{
108-
messages = [new ChatMessage(
109-
"system", textMessage.Content)];
127+
messages = new ChatMessage(
128+
"system", textMessage.Content);
110129
}
111130
else
112131
{
@@ -116,10 +135,61 @@ private IEnumerable<IMessage<ChatMessage>> ProcessTextMessage(TextMessage textMe
116135
else
117136
{
118137
// if from is not null, then the message is from user
119-
messages = [new ChatMessage(
120-
"user", textMessage.Content)];
138+
messages = new ChatMessage(
139+
"user", textMessage.Content);
121140
}
122141

123-
return messages.Select(m => new MessageEnvelope<ChatMessage>(m, from: textMessage.From));
142+
return new MessageEnvelope<ChatMessage>(messages, from: textMessage.From);
143+
}
144+
145+
private async Task<IMessage> ProcessMultiModalMessageAsync(MultiModalMessage multiModalMessage, IAgent agent)
146+
{
147+
var content = new List<ContentBase>();
148+
foreach (var message in multiModalMessage.Content)
149+
{
150+
switch (message)
151+
{
152+
case TextMessage textMessage when textMessage.GetContent() is not null:
153+
content.Add(new TextContent { Text = textMessage.GetContent() });
154+
break;
155+
case ImageMessage imageMessage:
156+
content.Add(new ImageContent() { Source = await ProcessImageSourceAsync(imageMessage) });
157+
break;
158+
}
159+
}
160+
161+
var chatMessage = new ChatMessage("user", content);
162+
return MessageEnvelope.Create(chatMessage, agent.Name);
163+
}
164+
165+
private async Task<ImageSource> ProcessImageSourceAsync(ImageMessage imageMessage)
166+
{
167+
if (imageMessage.Data != null)
168+
{
169+
return new ImageSource
170+
{
171+
MediaType = imageMessage.Data.MediaType,
172+
Data = Convert.ToBase64String(imageMessage.Data.ToArray())
173+
};
174+
}
175+
176+
if (imageMessage.Url is null)
177+
{
178+
throw new InvalidOperationException("Invalid ImageMessage, the data or url must be provided");
179+
}
180+
181+
var uri = new Uri(imageMessage.Url);
182+
using var client = new HttpClient();
183+
var response = client.GetAsync(uri).Result;
184+
if (!response.IsSuccessStatusCode)
185+
{
186+
throw new HttpRequestException($"Failed to download the image from {uri}");
187+
}
188+
189+
return new ImageSource
190+
{
191+
MediaType = "image/jpeg",
192+
Data = Convert.ToBase64String(await response.Content.ReadAsByteArrayAsync())
193+
};
124194
}
125195
}

dotnet/src/AutoGen.Core/Agent/MiddlewareStreamingAgent.cs

-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ public Task<IMessage> GenerateReplyAsync(IEnumerable<IMessage> messages, Generat
4949

5050
public IAsyncEnumerable<IStreamingMessage> GenerateStreamingReplyAsync(IEnumerable<IMessage> messages, GenerateReplyOptions? options = null, CancellationToken cancellationToken = default)
5151
{
52-
5352
return _agent.GenerateStreamingReplyAsync(messages, options, cancellationToken);
5453
}
5554

Original file line numberDiff line numberDiff line change
@@ -1,31 +1,108 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// AnthropicClientAgentTest.cs
33

4+
using AutoGen.Anthropic.DTO;
45
using AutoGen.Anthropic.Extensions;
56
using AutoGen.Anthropic.Utils;
7+
using AutoGen.Core;
68
using AutoGen.Tests;
7-
using Xunit.Abstractions;
9+
using FluentAssertions;
810

9-
namespace AutoGen.Anthropic;
11+
namespace AutoGen.Anthropic.Tests;
1012

1113
public class AnthropicClientAgentTest
1214
{
13-
private readonly ITestOutputHelper _output;
14-
15-
public AnthropicClientAgentTest(ITestOutputHelper output) => _output = output;
16-
1715
[ApiKeyFact("ANTHROPIC_API_KEY")]
1816
public async Task AnthropicAgentChatCompletionTestAsync()
1917
{
2018
var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey);
2119

20+
var agent = new AnthropicClientAgent(
21+
client,
22+
name: "AnthropicAgent",
23+
AnthropicConstants.Claude3Haiku,
24+
systemMessage: "You are a helpful AI assistant that convert user message to upper case")
25+
.RegisterMessageConnector();
26+
27+
var uppCaseMessage = new TextMessage(Role.User, "abcdefg");
28+
29+
var reply = await agent.SendAsync(chatHistory: new[] { uppCaseMessage });
30+
31+
reply.GetContent().Should().Contain("ABCDEFG");
32+
reply.From.Should().Be(agent.Name);
33+
}
34+
35+
[ApiKeyFact("ANTHROPIC_API_KEY")]
36+
public async Task AnthropicAgentTestProcessImageAsync()
37+
{
38+
var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey);
2239
var agent = new AnthropicClientAgent(
2340
client,
2441
name: "AnthropicAgent",
2542
AnthropicConstants.Claude3Haiku).RegisterMessageConnector();
2643

27-
var singleAgentTest = new SingleAgentTest(_output);
28-
await singleAgentTest.UpperCaseTestAsync(agent);
29-
await singleAgentTest.UpperCaseStreamingTestAsync(agent);
44+
var base64Image = await AnthropicTestUtils.Base64FromImageAsync("square.png");
45+
var imageMessage = new ChatMessage("user",
46+
[new ImageContent { Source = new ImageSource { MediaType = "image/png", Data = base64Image } }]);
47+
48+
var messages = new IMessage[] { MessageEnvelope.Create(imageMessage) };
49+
50+
// test streaming
51+
foreach (var message in messages)
52+
{
53+
var reply = agent.GenerateStreamingReplyAsync([message]);
54+
55+
await foreach (var streamingMessage in reply)
56+
{
57+
streamingMessage.Should().BeOfType<TextMessageUpdate>();
58+
streamingMessage.As<TextMessageUpdate>().From.Should().Be(agent.Name);
59+
}
60+
}
61+
}
62+
63+
[ApiKeyFact("ANTHROPIC_API_KEY")]
64+
public async Task AnthropicAgentTestMultiModalAsync()
65+
{
66+
var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey);
67+
var agent = new AnthropicClientAgent(
68+
client,
69+
name: "AnthropicAgent",
70+
AnthropicConstants.Claude3Haiku)
71+
.RegisterMessageConnector();
72+
73+
var image = Path.Combine("images", "square.png");
74+
var binaryData = BinaryData.FromBytes(await File.ReadAllBytesAsync(image), "image/png");
75+
var imageMessage = new ImageMessage(Role.User, binaryData);
76+
var textMessage = new TextMessage(Role.User, "What's in this image?");
77+
var multiModalMessage = new MultiModalMessage(Role.User, [textMessage, imageMessage]);
78+
79+
var reply = await agent.SendAsync(multiModalMessage);
80+
reply.Should().BeOfType<TextMessage>();
81+
reply.GetRole().Should().Be(Role.Assistant);
82+
reply.GetContent().Should().NotBeNullOrEmpty();
83+
reply.From.Should().Be(agent.Name);
84+
}
85+
86+
[ApiKeyFact("ANTHROPIC_API_KEY")]
87+
public async Task AnthropicAgentTestImageMessageAsync()
88+
{
89+
var client = new AnthropicClient(new HttpClient(), AnthropicConstants.Endpoint, AnthropicTestUtils.ApiKey);
90+
var agent = new AnthropicClientAgent(
91+
client,
92+
name: "AnthropicAgent",
93+
AnthropicConstants.Claude3Haiku,
94+
systemMessage: "You are a helpful AI assistant that is capable of determining what an image is. Tell me a brief description of the image."
95+
)
96+
.RegisterMessageConnector();
97+
98+
var image = Path.Combine("images", "square.png");
99+
var binaryData = BinaryData.FromBytes(await File.ReadAllBytesAsync(image), "image/png");
100+
var imageMessage = new ImageMessage(Role.User, binaryData);
101+
102+
var reply = await agent.SendAsync(imageMessage);
103+
reply.Should().BeOfType<TextMessage>();
104+
reply.GetRole().Should().Be(Role.Assistant);
105+
reply.GetContent().Should().NotBeNullOrEmpty();
106+
reply.From.Should().Be(agent.Name);
30107
}
31108
}

0 commit comments

Comments
 (0)