From f7adf81a355f25fe78c4d84adbdf838520e43c41 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:08:53 +0000 Subject: [PATCH 1/5] Initial plan From e6e0de44bbb9d8514fc6f2df39438c7e0c068fae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:15:30 +0000 Subject: [PATCH 2/5] Fix DataUriParser to default to text/plain;charset=US-ASCII per RFC 2397 Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Contents/DataUriParser.cs | 12 ++++- .../Contents/DataContentTests.cs | 50 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs index 6afe1409e75..0d6c5b1e1df 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs @@ -23,6 +23,11 @@ internal static class DataUriParser { public static string Scheme => "data:"; + /// + /// The default media type per RFC 2397 when the media type is omitted. + /// + public const string DefaultMediaType = "text/plain;charset=US-ASCII"; + public static DataUri Parse(ReadOnlyMemory dataUri) { // Validate, then trim off the "data:" scheme. @@ -59,9 +64,14 @@ public static DataUri Parse(ReadOnlyMemory dataUri) } // Validate the media type, if present. + // Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII". ReadOnlySpan span = metadata.Span.Trim(); string? mediaType = null; - if (!span.IsEmpty && !IsValidMediaType(span, ref mediaType)) + if (span.IsEmpty) + { + mediaType = DefaultMediaType; + } + else if (!IsValidMediaType(span, ref mediaType)) { throw new UriFormatException("Invalid data URI format: the media type is not a valid."); } diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs index d87d776185a..e212cd101f2 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs @@ -109,6 +109,56 @@ public void Ctor_NoMediaType_Roundtrips() Assert.Equal("aGVsbG8=", content.Base64Data.ToString()); } + [Fact] + public void Ctor_OmittedMediaType_DefaultsToTextPlain() + { + // Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII" + DataContent content; + + // Omitted media type with URL-encoded data + content = new DataContent("data:,hello"); + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray())); + + // Omitted media type with base64-encoded data + content = new DataContent("data:;base64,aGVsbG8="); + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray())); + + // Omitted media type with URL-encoded data containing special chars + content = new DataContent("data:,hello%20world"); + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Equal("hello world", Encoding.UTF8.GetString(content.Data.ToArray())); + + // Omitted media type with empty data + content = new DataContent("data:,"); + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Empty(content.Data.ToArray()); + + // Omitted media type with empty base64 data + content = new DataContent("data:;base64,"); + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Empty(content.Data.ToArray()); + + // Omitted media type with Uri constructor + content = new DataContent(new Uri("data:,hello")); + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray())); + } + + [Fact] + public void Ctor_OmittedMediaType_CanBeOverridden() + { + // When media type is omitted in the URI but provided as a parameter, the parameter takes precedence + DataContent content; + + content = new DataContent("data:,hello", "application/json"); + Assert.Equal("application/json", content.MediaType); + + content = new DataContent("data:;base64,aGVsbG8=", "application/octet-stream"); + Assert.Equal("application/octet-stream", content.MediaType); + } + [Fact] public void Serialize_MatchesExpectedJson() { From 0c6b61bb679e9c225ca3b6a7063ebf7184a3a824 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:20:15 +0000 Subject: [PATCH 3/5] Add text/plain;charset=US-ASCII to known media types for performance Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Contents/DataUriParser.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs index 0d6c5b1e1df..951aa6b81a1 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs @@ -122,6 +122,7 @@ public static bool IsValidMediaType(ReadOnlySpan mediaTypeSpan, [NotNull] "text/html" => "text/html", "text/javascript" => "text/javascript", "text/plain" => "text/plain", + "text/plain;charset=US-ASCII" => DefaultMediaType, "text/plain;charset=UTF-8" => "text/plain;charset=UTF-8", "text/xml" => "text/xml", _ => null, From f15ed0e260141c9e24cc6f64ef36de08df1c5b8a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:33:55 +0000 Subject: [PATCH 4/5] Address review feedback: use DefaultMediaType constant and refactor tests to Theory Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Contents/DataUriParser.cs | 2 +- .../Contents/DataContentTests.cs | 57 ++++++------------- 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs index 951aa6b81a1..792bb750b5c 100644 --- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs +++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs @@ -101,6 +101,7 @@ public static bool IsValidMediaType(ReadOnlySpan mediaTypeSpan, [NotNull] // For common media types, we can avoid both allocating a string for the span and avoid parsing overheads. string? knownType = mediaTypeSpan switch { + DefaultMediaType => DefaultMediaType, "application/json" => "application/json", "application/octet-stream" => "application/octet-stream", "application/pdf" => "application/pdf", @@ -122,7 +123,6 @@ public static bool IsValidMediaType(ReadOnlySpan mediaTypeSpan, [NotNull] "text/html" => "text/html", "text/javascript" => "text/javascript", "text/plain" => "text/plain", - "text/plain;charset=US-ASCII" => DefaultMediaType, "text/plain;charset=UTF-8" => "text/plain;charset=UTF-8", "text/xml" => "text/xml", _ => null, diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs index e212cd101f2..daf6657b639 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs @@ -109,54 +109,33 @@ public void Ctor_NoMediaType_Roundtrips() Assert.Equal("aGVsbG8=", content.Base64Data.ToString()); } - [Fact] - public void Ctor_OmittedMediaType_DefaultsToTextPlain() + [Theory] + [InlineData("data:,hello", "hello")] + [InlineData("data:;base64,aGVsbG8=", "hello")] + [InlineData("data:,hello%20world", "hello world")] + [InlineData("data:,", "")] + [InlineData("data:;base64,", "")] + public void Ctor_OmittedMediaType_DefaultsToTextPlain(string uri, string expectedData) { // Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII" - DataContent content; - - // Omitted media type with URL-encoded data - content = new DataContent("data:,hello"); - Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray())); - - // Omitted media type with base64-encoded data - content = new DataContent("data:;base64,aGVsbG8="); - Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray())); - - // Omitted media type with URL-encoded data containing special chars - content = new DataContent("data:,hello%20world"); - Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Equal("hello world", Encoding.UTF8.GetString(content.Data.ToArray())); - - // Omitted media type with empty data - content = new DataContent("data:,"); + var content = new DataContent(uri); Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Empty(content.Data.ToArray()); + Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray())); - // Omitted media type with empty base64 data - content = new DataContent("data:;base64,"); + // Also test with Uri constructor + content = new DataContent(new Uri(uri)); Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Empty(content.Data.ToArray()); - - // Omitted media type with Uri constructor - content = new DataContent(new Uri("data:,hello")); - Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray())); + Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray())); } - [Fact] - public void Ctor_OmittedMediaType_CanBeOverridden() + [Theory] + [InlineData("data:,hello", "application/json")] + [InlineData("data:;base64,aGVsbG8=", "application/octet-stream")] + public void Ctor_OmittedMediaType_CanBeOverridden(string uri, string mediaType) { // When media type is omitted in the URI but provided as a parameter, the parameter takes precedence - DataContent content; - - content = new DataContent("data:,hello", "application/json"); - Assert.Equal("application/json", content.MediaType); - - content = new DataContent("data:;base64,aGVsbG8=", "application/octet-stream"); - Assert.Equal("application/octet-stream", content.MediaType); + var content = new DataContent(uri, mediaType); + Assert.Equal(mediaType, content.MediaType); } [Fact] From bd543b0191cfe5ebf0861225c51692720ec2be33 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:47:11 +0000 Subject: [PATCH 5/5] Refactor test to use static local function for assertions Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- .../Contents/DataContentTests.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs index daf6657b639..3e6fa85a489 100644 --- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs +++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs @@ -118,14 +118,14 @@ public void Ctor_NoMediaType_Roundtrips() public void Ctor_OmittedMediaType_DefaultsToTextPlain(string uri, string expectedData) { // Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII" - var content = new DataContent(uri); - Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray())); - - // Also test with Uri constructor - content = new DataContent(new Uri(uri)); - Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); - Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray())); + static void Validate(DataContent content, string expectedData) + { + Assert.Equal("text/plain;charset=US-ASCII", content.MediaType); + Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray())); + } + + Validate(new DataContent(uri), expectedData); + Validate(new DataContent(new Uri(uri)), expectedData); } [Theory]