diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIResponsesChatClient.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIResponsesChatClient.cs index b9aca3161a0..5fb6ac1935a 100644 --- a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIResponsesChatClient.cs +++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIResponsesChatClient.cs @@ -46,6 +46,12 @@ private static readonly Func>)); + // Workaround for https://github.com/openai/openai-dotnet/pull/874. + // The OpenAI library doesn't yet expose InputImageUrl as a public property, so we access it via reflection. + // Replace this with the actual public property once it's available (e.g., part.InputImageUrl). + private static readonly PropertyInfo? _inputImageUrlProperty = + Type.GetType("OpenAI.Responses.InternalItemContentInputImage, OpenAI")?.GetProperty("ImageUrl"); + /// Metadata about the client. private readonly ChatClientMetadata _metadata; @@ -1192,11 +1198,38 @@ private static List ToAIContents(IEnumerable con break; case ResponseContentPartKind.InputFile or ResponseContentPartKind.InputImage: - content = - !string.IsNullOrWhiteSpace(part.InputImageFileId) ? new HostedFileContent(part.InputImageFileId) { MediaType = "image/*" } : - !string.IsNullOrWhiteSpace(part.InputFileId) ? new HostedFileContent(part.InputFileId) { Name = part.InputFilename } : - part.InputFileBytes is not null ? new DataContent(part.InputFileBytes, part.InputFileBytesMediaType ?? "application/octet-stream") { Name = part.InputFilename } : - null; + if (!string.IsNullOrWhiteSpace(part.InputImageFileId)) + { + content = new HostedFileContent(part.InputImageFileId) { MediaType = "image/*" }; + } + else if (!string.IsNullOrWhiteSpace(part.InputFileId)) + { + content = new HostedFileContent(part.InputFileId) { Name = part.InputFilename }; + } + else if (part.InputFileBytes is not null) + { + content = new DataContent(part.InputFileBytes, part.InputFileBytesMediaType ?? "application/octet-stream") { Name = part.InputFilename }; + } + else if (_inputImageUrlProperty?.GetValue(part) is string inputImageUrl) + { + if (inputImageUrl.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) + { + content = new DataContent(inputImageUrl); + } + else if (Uri.TryCreate(inputImageUrl, UriKind.Absolute, out Uri? imageUri)) + { + content = new UriContent(imageUri, "image/*"); + } + else + { + content = null; + } + } + else + { + content = null; + } + break; case ResponseContentPartKind.Refusal: diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIResponseClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIResponseClientTests.cs index c84eba42fa1..d1a6749450b 100644 --- a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIResponseClientTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAIResponseClientTests.cs @@ -5198,6 +5198,132 @@ public async Task ResponseWithRefusalContent_ParsesCorrectly() Assert.Equal("Refusal", errorContent.ErrorCode); } + [Fact] + public async Task ResponseWithInputImageHttpUrl_ParsesAsUriContent() + { + const string Input = """ + { + "model":"gpt-4o-mini", + "input":[{"type":"message","role":"user","content":[{"type":"input_text","text":"What is in this image?"}]}] + } + """; + + // The output includes a message with input_image content that has an image_url property with HTTP URL. + const string Output = """ + { + "id":"resp_001", + "object":"response", + "created_at":1741892091, + "status":"completed", + "model":"gpt-4o-mini", + "output":[ + { + "type":"message", + "id":"msg_001", + "status":"completed", + "role":"user", + "content":[ + {"type":"input_image","image_url":"https://example.com/image.png"} + ] + }, + { + "type":"message", + "id":"msg_002", + "status":"completed", + "role":"assistant", + "content":[ + {"type":"output_text","text":"This is a cat.","annotations":[]} + ] + } + ] + } + """; + + using VerbatimHttpHandler handler = new(Input, Output); + using HttpClient httpClient = new(handler); + using IChatClient client = CreateResponseClient(httpClient, "gpt-4o-mini"); + + var response = await client.GetResponseAsync("What is in this image?"); + + Assert.NotNull(response); + + var userMessage = response.Messages.FirstOrDefault(m => m.Role == ChatRole.User); + Assert.NotNull(userMessage); + + // HTTP URL should be returned as UriContent + var imageContent = userMessage.Contents.OfType().FirstOrDefault(); + Assert.NotNull(imageContent); + Assert.Equal("https://example.com/image.png", imageContent.Uri.ToString()); + Assert.Equal("image/*", imageContent.MediaType); + + var assistantMessage = response.Messages.LastOrDefault(m => m.Role == ChatRole.Assistant); + Assert.NotNull(assistantMessage); + Assert.Equal("This is a cat.", assistantMessage.Text); + } + + [Fact] + public async Task ResponseWithInputImageDataUri_ParsesAsDataContent() + { + const string Input = """ + { + "model":"gpt-4o-mini", + "input":[{"type":"message","role":"user","content":[{"type":"input_text","text":"What is in this image?"}]}] + } + """; + + // The output includes a message with input_image content that has an image_url property with a data URI. + const string Output = """ + { + "id":"resp_001", + "object":"response", + "created_at":1741892091, + "status":"completed", + "model":"gpt-4o-mini", + "output":[ + { + "type":"message", + "id":"msg_001", + "status":"completed", + "role":"user", + "content":[ + {"type":"input_image","image_url":""} + ] + }, + { + "type":"message", + "id":"msg_002", + "status":"completed", + "role":"assistant", + "content":[ + {"type":"output_text","text":"This is a red pixel.","annotations":[]} + ] + } + ] + } + """; + + using VerbatimHttpHandler handler = new(Input, Output); + using HttpClient httpClient = new(handler); + using IChatClient client = CreateResponseClient(httpClient, "gpt-4o-mini"); + + var response = await client.GetResponseAsync("What is in this image?"); + + Assert.NotNull(response); + + var userMessage = response.Messages.FirstOrDefault(m => m.Role == ChatRole.User); + Assert.NotNull(userMessage); + + // Data URI should be returned as DataContent + var imageContent = userMessage.Contents.OfType().FirstOrDefault(); + Assert.NotNull(imageContent); + Assert.Equal("image/png", imageContent.MediaType); + Assert.True(imageContent.Data.Length > 0); + + var assistantMessage = response.Messages.LastOrDefault(m => m.Role == ChatRole.Assistant); + Assert.NotNull(assistantMessage); + Assert.Equal("This is a red pixel.", assistantMessage.Text); + } + [Fact] public async Task HostedImageGenerationTool_NonStreaming() {