From f7adf81a355f25fe78c4d84adbdf838520e43c41 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 30 Jan 2026 15:08:53 +0000
Subject: [PATCH 1/5] Initial plan
From e6e0de44bbb9d8514fc6f2df39438c7e0c068fae Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 30 Jan 2026 15:15:30 +0000
Subject: [PATCH 2/5] Fix DataUriParser to default to
text/plain;charset=US-ASCII per RFC 2397
Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
.../Contents/DataUriParser.cs | 12 ++++-
.../Contents/DataContentTests.cs | 50 +++++++++++++++++++
2 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
index 6afe1409e75..0d6c5b1e1df 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
@@ -23,6 +23,11 @@ internal static class DataUriParser
{
public static string Scheme => "data:";
+ ///
+ /// The default media type per RFC 2397 when the media type is omitted.
+ ///
+ public const string DefaultMediaType = "text/plain;charset=US-ASCII";
+
public static DataUri Parse(ReadOnlyMemory dataUri)
{
// Validate, then trim off the "data:" scheme.
@@ -59,9 +64,14 @@ public static DataUri Parse(ReadOnlyMemory dataUri)
}
// Validate the media type, if present.
+ // Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII".
ReadOnlySpan span = metadata.Span.Trim();
string? mediaType = null;
- if (!span.IsEmpty && !IsValidMediaType(span, ref mediaType))
+ if (span.IsEmpty)
+ {
+ mediaType = DefaultMediaType;
+ }
+ else if (!IsValidMediaType(span, ref mediaType))
{
throw new UriFormatException("Invalid data URI format: the media type is not a valid.");
}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
index d87d776185a..e212cd101f2 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
@@ -109,6 +109,56 @@ public void Ctor_NoMediaType_Roundtrips()
Assert.Equal("aGVsbG8=", content.Base64Data.ToString());
}
+ [Fact]
+ public void Ctor_OmittedMediaType_DefaultsToTextPlain()
+ {
+ // Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII"
+ DataContent content;
+
+ // Omitted media type with URL-encoded data
+ content = new DataContent("data:,hello");
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray()));
+
+ // Omitted media type with base64-encoded data
+ content = new DataContent("data:;base64,aGVsbG8=");
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray()));
+
+ // Omitted media type with URL-encoded data containing special chars
+ content = new DataContent("data:,hello%20world");
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Equal("hello world", Encoding.UTF8.GetString(content.Data.ToArray()));
+
+ // Omitted media type with empty data
+ content = new DataContent("data:,");
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Empty(content.Data.ToArray());
+
+ // Omitted media type with empty base64 data
+ content = new DataContent("data:;base64,");
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Empty(content.Data.ToArray());
+
+ // Omitted media type with Uri constructor
+ content = new DataContent(new Uri("data:,hello"));
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray()));
+ }
+
+ [Fact]
+ public void Ctor_OmittedMediaType_CanBeOverridden()
+ {
+ // When media type is omitted in the URI but provided as a parameter, the parameter takes precedence
+ DataContent content;
+
+ content = new DataContent("data:,hello", "application/json");
+ Assert.Equal("application/json", content.MediaType);
+
+ content = new DataContent("data:;base64,aGVsbG8=", "application/octet-stream");
+ Assert.Equal("application/octet-stream", content.MediaType);
+ }
+
[Fact]
public void Serialize_MatchesExpectedJson()
{
From 0c6b61bb679e9c225ca3b6a7063ebf7184a3a824 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 30 Jan 2026 15:20:15 +0000
Subject: [PATCH 3/5] Add text/plain;charset=US-ASCII to known media types for
performance
Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
.../Contents/DataUriParser.cs | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
index 0d6c5b1e1df..951aa6b81a1 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
@@ -122,6 +122,7 @@ public static bool IsValidMediaType(ReadOnlySpan mediaTypeSpan, [NotNull]
"text/html" => "text/html",
"text/javascript" => "text/javascript",
"text/plain" => "text/plain",
+ "text/plain;charset=US-ASCII" => DefaultMediaType,
"text/plain;charset=UTF-8" => "text/plain;charset=UTF-8",
"text/xml" => "text/xml",
_ => null,
From f15ed0e260141c9e24cc6f64ef36de08df1c5b8a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 30 Jan 2026 15:33:55 +0000
Subject: [PATCH 4/5] Address review feedback: use DefaultMediaType constant
and refactor tests to Theory
Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
.../Contents/DataUriParser.cs | 2 +-
.../Contents/DataContentTests.cs | 57 ++++++-------------
2 files changed, 19 insertions(+), 40 deletions(-)
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
index 951aa6b81a1..792bb750b5c 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/DataUriParser.cs
@@ -101,6 +101,7 @@ public static bool IsValidMediaType(ReadOnlySpan mediaTypeSpan, [NotNull]
// For common media types, we can avoid both allocating a string for the span and avoid parsing overheads.
string? knownType = mediaTypeSpan switch
{
+ DefaultMediaType => DefaultMediaType,
"application/json" => "application/json",
"application/octet-stream" => "application/octet-stream",
"application/pdf" => "application/pdf",
@@ -122,7 +123,6 @@ public static bool IsValidMediaType(ReadOnlySpan mediaTypeSpan, [NotNull]
"text/html" => "text/html",
"text/javascript" => "text/javascript",
"text/plain" => "text/plain",
- "text/plain;charset=US-ASCII" => DefaultMediaType,
"text/plain;charset=UTF-8" => "text/plain;charset=UTF-8",
"text/xml" => "text/xml",
_ => null,
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
index e212cd101f2..daf6657b639 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
@@ -109,54 +109,33 @@ public void Ctor_NoMediaType_Roundtrips()
Assert.Equal("aGVsbG8=", content.Base64Data.ToString());
}
- [Fact]
- public void Ctor_OmittedMediaType_DefaultsToTextPlain()
+ [Theory]
+ [InlineData("data:,hello", "hello")]
+ [InlineData("data:;base64,aGVsbG8=", "hello")]
+ [InlineData("data:,hello%20world", "hello world")]
+ [InlineData("data:,", "")]
+ [InlineData("data:;base64,", "")]
+ public void Ctor_OmittedMediaType_DefaultsToTextPlain(string uri, string expectedData)
{
// Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII"
- DataContent content;
-
- // Omitted media type with URL-encoded data
- content = new DataContent("data:,hello");
- Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray()));
-
- // Omitted media type with base64-encoded data
- content = new DataContent("data:;base64,aGVsbG8=");
- Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray()));
-
- // Omitted media type with URL-encoded data containing special chars
- content = new DataContent("data:,hello%20world");
- Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Equal("hello world", Encoding.UTF8.GetString(content.Data.ToArray()));
-
- // Omitted media type with empty data
- content = new DataContent("data:,");
+ var content = new DataContent(uri);
Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Empty(content.Data.ToArray());
+ Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray()));
- // Omitted media type with empty base64 data
- content = new DataContent("data:;base64,");
+ // Also test with Uri constructor
+ content = new DataContent(new Uri(uri));
Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Empty(content.Data.ToArray());
-
- // Omitted media type with Uri constructor
- content = new DataContent(new Uri("data:,hello"));
- Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Equal("hello", Encoding.UTF8.GetString(content.Data.ToArray()));
+ Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray()));
}
- [Fact]
- public void Ctor_OmittedMediaType_CanBeOverridden()
+ [Theory]
+ [InlineData("data:,hello", "application/json")]
+ [InlineData("data:;base64,aGVsbG8=", "application/octet-stream")]
+ public void Ctor_OmittedMediaType_CanBeOverridden(string uri, string mediaType)
{
// When media type is omitted in the URI but provided as a parameter, the parameter takes precedence
- DataContent content;
-
- content = new DataContent("data:,hello", "application/json");
- Assert.Equal("application/json", content.MediaType);
-
- content = new DataContent("data:;base64,aGVsbG8=", "application/octet-stream");
- Assert.Equal("application/octet-stream", content.MediaType);
+ var content = new DataContent(uri, mediaType);
+ Assert.Equal(mediaType, content.MediaType);
}
[Fact]
From bd543b0191cfe5ebf0861225c51692720ec2be33 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 30 Jan 2026 15:47:11 +0000
Subject: [PATCH 5/5] Refactor test to use static local function for assertions
Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
---
.../Contents/DataContentTests.cs | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
index daf6657b639..3e6fa85a489 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/DataContentTests.cs
@@ -118,14 +118,14 @@ public void Ctor_NoMediaType_Roundtrips()
public void Ctor_OmittedMediaType_DefaultsToTextPlain(string uri, string expectedData)
{
// Per RFC 2397, if the media type is omitted, it defaults to "text/plain;charset=US-ASCII"
- var content = new DataContent(uri);
- Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray()));
-
- // Also test with Uri constructor
- content = new DataContent(new Uri(uri));
- Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
- Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray()));
+ static void Validate(DataContent content, string expectedData)
+ {
+ Assert.Equal("text/plain;charset=US-ASCII", content.MediaType);
+ Assert.Equal(expectedData, Encoding.UTF8.GetString(content.Data.ToArray()));
+ }
+
+ Validate(new DataContent(uri), expectedData);
+ Validate(new DataContent(new Uri(uri)), expectedData);
}
[Theory]