Skip to content

Commit d4180dd

Browse files
committed
Add PDF support to OpenAI AsIChatClient
1 parent 083f765 commit d4180dd

File tree

8 files changed

+44
-0
lines changed

8 files changed

+44
-0
lines changed

eng/packages/TestOnly.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
<PackageVersion Include="Moq.AutoMock" Version="3.1.0" />
1919
<PackageVersion Include="Moq" Version="4.18.4" />
2020
<PackageVersion Include="OpenTelemetry.Exporter.InMemory" Version="1.9.0" />
21+
<PackageVersion Include="PdfPig" Version="0.1.10-alpha-20250203-fdb88" />
2122
<PackageVersion Include="Polly.Testing" Version="8.4.2" />
2223
<PackageVersion Include="StrongNamer" Version="0.2.5" />
2324
<PackageVersion Include="System.Configuration.ConfigurationManager" Version="$(SystemConfigurationConfigurationManagerVersion)" />

src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIChatClient.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,10 @@ private static List<ChatMessageContentPart> ToOpenAIChatContent(IList<AIContent>
209209
}
210210

211211
break;
212+
213+
case DataContent dataContent when dataContent.MediaType.StartsWith("application/pdf", StringComparison.OrdinalIgnoreCase):
214+
parts.Add(ChatMessageContentPart.CreateFilePart(BinaryData.FromBytes(dataContent.Data), dataContent.MediaType, $"{Guid.NewGuid():N}.pdf"));
215+
break;
212216
}
213217
}
214218

src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIResponseChatClient.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,11 @@ private static List<ResponseContentPart> ToOpenAIResponsesContent(IList<AIConten
567567
case DataContent dataContent when dataContent.HasTopLevelMediaType("image"):
568568
parts.Add(ResponseContentPart.CreateInputImagePart(BinaryData.FromBytes(dataContent.Data), dataContent.MediaType));
569569
break;
570+
571+
case DataContent dataContent when dataContent.MediaType.StartsWith("application/pdf", StringComparison.OrdinalIgnoreCase):
572+
parts.Add(ResponseContentPart.CreateInputFilePart(null, $"{Guid.NewGuid():N}.pdf",
573+
BinaryData.FromBytes(JsonSerializer.SerializeToUtf8Bytes(dataContent.Uri, ResponseClientJsonContext.Default.String))));
574+
break;
570575
}
571576
}
572577

test/Libraries/Microsoft.Extensions.AI.Evaluation.Integration.Tests/Microsoft.Extensions.AI.Evaluation.Integration.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
<PackageReference Include="Azure.Identity" />
2525
<PackageReference Include="Microsoft.Extensions.Hosting" />
2626
<PackageReference Include="OpenAI" />
27+
<PackageReference Include="PdfPig" />
2728
</ItemGroup>
2829

2930
<ItemGroup>

test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,24 @@ public virtual async Task MultiModal_DescribeImage()
183183
Assert.True(response.Text.IndexOf("net", StringComparison.OrdinalIgnoreCase) >= 0, response.Text);
184184
}
185185

186+
[ConditionalFact]
187+
public virtual async Task MultiModal_DescribePdf()
188+
{
189+
SkipIfNotEnabled();
190+
191+
var response = await _chatClient.GetResponseAsync(
192+
[
193+
new(ChatRole.User,
194+
[
195+
new TextContent("What text does this document contain?"),
196+
new DataContent(ImageDataUri.GetPdfDataUri(), "application/pdf"),
197+
])
198+
],
199+
new() { ModelId = GetModel_MultiModal_DescribeImage() });
200+
201+
Assert.True(response.Text.IndexOf("hello", StringComparison.OrdinalIgnoreCase) >= 0, response.Text);
202+
}
203+
186204
[ConditionalFact]
187205
public virtual async Task FunctionInvocation_AutomaticallyInvokeFunction_Parameterless()
188206
{

test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
<PackageReference Include="Microsoft.ML.Tokenizers" />
4444
<PackageReference Include="Microsoft.ML.Tokenizers.Data.O200kBase" />
4545
<PackageReference Include="OpenTelemetry.Exporter.InMemory" />
46+
<PackageReference Include="PdfPig" />
4647
<PackageReference Include="System.Numerics.Tensors" />
4748
</ItemGroup>
4849

test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,6 @@
3333
<ItemGroup>
3434
<PackageReference Include="Azure.AI.OpenAI" />
3535
<PackageReference Include="Azure.Identity" />
36+
<PackageReference Include="PdfPig" />
3637
</ItemGroup>
3738
</Project>

test/Shared/ImageDataUri/ImageDataUri.cs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
using System;
55
using System.IO;
6+
using UglyToad.PdfPig.Content;
7+
using UglyToad.PdfPig.Core;
8+
using UglyToad.PdfPig.Fonts.Standard14Fonts;
9+
using UglyToad.PdfPig.Writer;
610
using Xunit;
711

812
namespace Microsoft.Extensions.AI;
@@ -17,4 +21,13 @@ internal static Uri GetImageDataUri()
1721
s.CopyTo(ms);
1822
return new Uri($"data:image/png;base64,{Convert.ToBase64String(ms.ToArray())}");
1923
}
24+
25+
internal static Uri GetPdfDataUri()
26+
{
27+
using PdfDocumentBuilder builder = new PdfDocumentBuilder();
28+
PdfPageBuilder page = builder.AddPage(PageSize.A4);
29+
PdfDocumentBuilder.AddedFont font = builder.AddStandard14Font(Standard14Font.Helvetica);
30+
page.AddText("Hello World!", 12, new PdfPoint(25, 700), font);
31+
return new Uri($"data:application/pdf;base64,{Convert.ToBase64String(builder.Build())}");
32+
}
2033
}

0 commit comments

Comments
 (0)