Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
658 changes: 658 additions & 0 deletions docs/decisions/0016-structured-output.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions dotnet/agent-framework-dotnet.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,9 @@
<File Path="src/Shared/Throw/README.md" />
<File Path="src/Shared/Throw/Throw.cs" />
</Folder>
<Folder Name="/Solution Items/src/Shared/StructuredOutput/">
<File Path="src/Shared/StructuredOutput/StructuredOutputSchemaUtilities.cs" />
</Folder>
<Folder Name="/Solution Items/tests/">
<File Path="tests/.editorconfig" />
<File Path="tests/Directory.Build.props" />
Expand Down
3 changes: 3 additions & 0 deletions dotnet/eng/MSBuild/Shared.props
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@
<ItemGroup Condition="'$(InjectSharedFoundryAgents)' == 'true'">
<Compile Include="$(MSBuildThisFileDirectory)\..\..\src\Shared\Foundry\Agents\*.cs" LinkBase="Shared\Foundry" />
</ItemGroup>
<ItemGroup Condition="'$(InjectSharedStructuredOutput)' == 'true'">
<Compile Include="$(MSBuildThisFileDirectory)\..\..\src\Shared\StructuredOutput\*.cs" LinkBase="Shared\StructuredOutput" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ protected override async IAsyncEnumerable<AgentResponseUpdate> RunCoreStreamingA

var response = allUpdates.ToAgentResponse();

if (response.TryDeserialize(this._jsonSerializerOptions, out JsonElement stateSnapshot))
if (TryDeserialize(response.Text, this._jsonSerializerOptions, out JsonElement stateSnapshot))
{
byte[] stateBytes = JsonSerializer.SerializeToUtf8Bytes(
stateSnapshot,
Expand All @@ -103,4 +103,25 @@ protected override async IAsyncEnumerable<AgentResponseUpdate> RunCoreStreamingA
yield return update;
}
}

private static bool TryDeserialize<T>(string json, JsonSerializerOptions jsonSerializerOptions, out T structuredOutput)
{
try
{
T? result = JsonSerializer.Deserialize<T>(json, jsonSerializerOptions);
if (result is null)
{
structuredOutput = default!;
return false;
}

structuredOutput = result;
return true;
}
catch
{
structuredOutput = default!;
return false;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ stateObj is not JsonElement state ||
var response = allUpdates.ToAgentResponse();

// Try to deserialize the structured state response
if (response.TryDeserialize(this._jsonSerializerOptions, out JsonElement stateSnapshot))
if (TryDeserialize(response.Text, this._jsonSerializerOptions, out JsonElement stateSnapshot))
{
// Serialize and emit as STATE_SNAPSHOT via DataContent
byte[] stateBytes = JsonSerializer.SerializeToUtf8Bytes(
Expand All @@ -134,4 +134,25 @@ stateObj is not JsonElement state ||
yield return update;
}
}

private static bool TryDeserialize<T>(string json, JsonSerializerOptions jsonSerializerOptions, out T structuredOutput)
{
try
{
T? deserialized = JsonSerializer.Deserialize<T>(json, jsonSerializerOptions);
if (deserialized is null)
{
structuredOutput = default!;
return false;
}

structuredOutput = deserialized;
return true;
}
catch
{
structuredOutput = default!;
return false;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.DependencyInjection;

namespace SampleApp;

/// <summary>
/// Provides extension methods for adding structured output capabilities to <see cref="AIAgentBuilder"/> instances.
/// </summary>
internal static class AIAgentBuilderExtensions
{
/// <summary>
/// Adds structured output capabilities to the agent pipeline, enabling conversion of text responses to structured JSON format.
/// </summary>
/// <param name="builder">The <see cref="AIAgentBuilder"/> to which structured output support will be added.</param>
/// <param name="chatClient">
/// The chat client used to transform text responses into structured JSON format.
/// If <see langword="null"/>, the chat client will be resolved from the service provider.
/// </param>
/// <param name="optionsFactory">
/// An optional factory function that returns the <see cref="StructuredOutputAgentOptions"/> instance to use.
/// This allows for fine-tuning the structured output behavior such as setting the response format or system message.
/// </param>
/// <returns>The <see cref="AIAgentBuilder"/> with structured output capabilities added, enabling method chaining.</returns>
/// <remarks>
/// <para>
/// A <see cref="ChatResponseFormatJson"/> must be specified either through the
/// <see cref="AgentRunOptions.ResponseFormat"/> at runtime or the <see cref="StructuredOutputAgentOptions.ChatOptions"/>
/// provided during configuration.
/// </para>
/// </remarks>
public static AIAgentBuilder UseStructuredOutput(
this AIAgentBuilder builder,
IChatClient? chatClient = null,
Func<StructuredOutputAgentOptions>? optionsFactory = null)
{
ArgumentNullException.ThrowIfNull(builder);

return builder.Use((innerAgent, services) =>
{
chatClient ??= services?.GetService<IChatClient>()
?? throw new InvalidOperationException($"No {nameof(IChatClient)} was provided and none could be resolved from the service provider. Either provide an {nameof(IChatClient)} explicitly or register one in the dependency injection container.");

return new StructuredOutputAgent(innerAgent, chatClient, optionsFactory?.Invoke());
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
using Azure.AI.OpenAI;
using Azure.Identity;
using Microsoft.Agents.AI;
using Microsoft.Extensions.AI;
using OpenAI.Chat;
using SampleApp;
using ChatMessage = Microsoft.Extensions.AI.ChatMessage;

var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set.");
var deploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
string endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set.");
string deploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? "gpt-4o-mini";

// Create chat client to be used by chat client agents.
// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
Expand All @@ -23,52 +25,159 @@
new DefaultAzureCredential())
.GetChatClient(deploymentName);

// Create the ChatClientAgent with the specified name and instructions.
ChatClientAgent agent = chatClient.AsAIAgent(name: "HelpfulAssistant", instructions: "You are a helpful assistant.");
// Demonstrates how to work with structured output via ResponseFormat with the non-generic RunAsync method.
// This approach is useful when:
// a. Structured output is used for inter-agent communication, where one agent produces structured output
// and passes it as text to another agent as input, without the need for the caller to directly work with the structured output.
// b. The type of the structured output is not known at compile time, so the generic RunAsync<T> method cannot be used.
// c. The type of the structured output is represented by JSON schema only, without a corresponding class or type in the code.
await UseStructuredOutputWithResponseFormatAsync(chatClient);

// Set PersonInfo as the type parameter of RunAsync method to specify the expected structured output from the agent and invoke the agent with some unstructured input.
AgentResponse<PersonInfo> response = await agent.RunAsync<PersonInfo>("Please provide information about John Smith, who is a 35-year-old software engineer.");
// Demonstrates how to work with structured output via the generic RunAsync<T> method.
// This approach is useful when the caller needs to directly work with the structured output in the code
// via an instance of the corresponding class or type and the type is known at compile time.
await UseStructuredOutputWithRunAsync(chatClient);

// Access the structured output via the Result property of the agent response.
Console.WriteLine("Assistant Output:");
Console.WriteLine($"Name: {response.Result.Name}");
Console.WriteLine($"Age: {response.Result.Age}");
Console.WriteLine($"Occupation: {response.Result.Occupation}");
// Demonstrates how to work with structured output when streaming using the RunStreamingAsync method.
await UseStructuredOutputWithRunStreamingAsync(chatClient);

// Create the ChatClientAgent with the specified name, instructions, and expected structured output the agent should produce.
ChatClientAgent agentWithPersonInfo = chatClient.AsAIAgent(new ChatClientAgentOptions()
// Demonstrates how to add structured output support to agents that don't natively support it using the structured output middleware.
// This approach is useful when working with agents that don't support structured output natively, or agents using models
// that don't have the capability to produce structured output, allowing you to still leverage structured output features by transforming
// the text output from the agent into structured data using a chat client.
await UseStructuredOutputWithMiddlewareAsync(chatClient);

static async Task UseStructuredOutputWithResponseFormatAsync(ChatClient chatClient)
{
Console.WriteLine("=== Structured Output with ResponseFormat ===");

// Create the agent
AIAgent agent = chatClient.AsAIAgent(new ChatClientAgentOptions()
{
Name = "HelpfulAssistant",
ChatOptions = new()
{
Instructions = "You are a helpful assistant.",
// Specify CityInfo as the type parameter of ForJsonSchema to indicate the expected structured output from the agent.
ResponseFormat = Microsoft.Extensions.AI.ChatResponseFormat.ForJsonSchema<CityInfo>()
}
});

// Invoke the agent with some unstructured input to extract the structured information from.
AgentResponse response = await agent.RunAsync("Provide information about the capital of France.");

// Access the structured output via the Text property of the agent response as JSON in scenarios when JSON as text is required
// and no object instance is needed (e.g., for logging, forwarding to another service, or storing in a database).
Console.WriteLine("Assistant Output (JSON):");
Console.WriteLine(response.Text);
Console.WriteLine();

// Deserialize the JSON text to work with the structured object in scenarios when you need to access properties,
// perform operations, or pass the data to methods that require the typed object instance.
CityInfo cityInfo = JsonSerializer.Deserialize<CityInfo>(response.Text)!;

Console.WriteLine("Assistant Output (Deserialized):");
Console.WriteLine($"Name: {cityInfo.Name}");
Console.WriteLine();
}

static async Task UseStructuredOutputWithRunAsync(ChatClient chatClient)
{
Console.WriteLine("=== Structured Output with RunAsync<T> ===");

// Create the agent
AIAgent agent = chatClient.AsAIAgent(name: "HelpfulAssistant", instructions: "You are a helpful assistant.");

// Set CityInfo as the type parameter of RunAsync method to specify the expected structured output from the agent and invoke it with some unstructured input.
AgentResponse<CityInfo> response = await agent.RunAsync<CityInfo>("Provide information about the capital of France.");

// Access the structured output via the Result property of the agent response.
CityInfo cityInfo = response.Result;

Console.WriteLine("Assistant Output:");
Console.WriteLine($"Name: {cityInfo.Name}");
Console.WriteLine();
}

static async Task UseStructuredOutputWithRunStreamingAsync(ChatClient chatClient)
{
Console.WriteLine("=== Structured Output with RunStreamingAsync ===");

// Create the agent
AIAgent agent = chatClient.AsAIAgent(new ChatClientAgentOptions()
{
Name = "HelpfulAssistant",
ChatOptions = new()
{
Instructions = "You are a helpful assistant.",
// Specify CityInfo as the type parameter of ForJsonSchema to indicate the expected structured output from the agent.
ResponseFormat = Microsoft.Extensions.AI.ChatResponseFormat.ForJsonSchema<CityInfo>()
}
});

// Invoke the agent with some unstructured input while streaming, to extract the structured information from.
IAsyncEnumerable<AgentResponseUpdate> updates = agent.RunStreamingAsync("Provide information about the capital of France.");

// Assemble all the parts of the streamed output.
AgentResponse nonGenericResponse = await updates.ToAgentResponseAsync();

// Access the structured output by deserializing JSON in the Text property.
CityInfo cityInfo = JsonSerializer.Deserialize<CityInfo>(nonGenericResponse.Text)!;

Console.WriteLine("Assistant Output:");
Console.WriteLine($"Name: {cityInfo.Name}");
Console.WriteLine();
}

static async Task UseStructuredOutputWithMiddlewareAsync(ChatClient chatClient)
{
Name = "HelpfulAssistant",
ChatOptions = new() { Instructions = "You are a helpful assistant.", ResponseFormat = Microsoft.Extensions.AI.ChatResponseFormat.ForJsonSchema<PersonInfo>() }
});
Console.WriteLine("=== Structured Output with UseStructuredOutput Middleware ===");

// Create chat client that will transform the agent text response into structured output.
IChatClient meaiChatClient = chatClient.AsIChatClient();

// Invoke the agent with some unstructured input while streaming, to extract the structured information from.
var updates = agentWithPersonInfo.RunStreamingAsync("Please provide information about John Smith, who is a 35-year-old software engineer.");
// Create the agent
AIAgent agent = meaiChatClient.AsAIAgent(name: "HelpfulAssistant", instructions: "You are a helpful assistant.");

// Assemble all the parts of the streamed output, since we can only deserialize once we have the full json,
// then deserialize the response into the PersonInfo class.
PersonInfo personInfo = (await updates.ToAgentResponseAsync()).Deserialize<PersonInfo>(JsonSerializerOptions.Web);
// Add structured output middleware via UseStructuredOutput method to add structured output support to the agent.
// This middleware transforms the agent's text response into structured data using a chat client.
// Since our agent does support structured output natively, we will add a middleware that removes ResponseFormat
// from the AgentRunOptions to emulate an agent that doesn't support structured output natively
agent = agent
.AsBuilder()
.UseStructuredOutput(meaiChatClient)
.Use(ResponseFormatRemovalMiddleware, null)
.Build();

Console.WriteLine("Assistant Output:");
Console.WriteLine($"Name: {personInfo.Name}");
Console.WriteLine($"Age: {personInfo.Age}");
Console.WriteLine($"Occupation: {personInfo.Occupation}");
// Set CityInfo as the type parameter of RunAsync method to specify the expected structured output from the agent and invoke it with some unstructured input.
AgentResponse<CityInfo> response = await agent.RunAsync<CityInfo>("Provide information about the capital of France.");

// Access the structured output via the Result property of the agent response.
CityInfo cityInfo = response.Result;

Console.WriteLine("Assistant Output:");
Console.WriteLine($"Name: {cityInfo.Name}");
Console.WriteLine();
}

static Task<AgentResponse> ResponseFormatRemovalMiddleware(IEnumerable<ChatMessage> messages, AgentSession? session, AgentRunOptions? options, AIAgent innerAgent, CancellationToken cancellationToken)
{
// Remove any ResponseFormat from the options to emulate an agent that doesn't support structured output natively.
options = options?.Clone();
options?.ResponseFormat = null;

return innerAgent.RunAsync(messages, session, options, cancellationToken);
}

namespace SampleApp
{
/// <summary>
/// Represents information about a person, including their name, age, and occupation, matched to the JSON schema used in the agent.
/// Represents information about a city, including its name.
/// </summary>
[Description("Information about a person including their name, age, and occupation")]
public class PersonInfo
[Description("Information about a city")]
public sealed class CityInfo
{
[JsonPropertyName("name")]
public string? Name { get; set; }

[JsonPropertyName("age")]
public int? Age { get; set; }

[JsonPropertyName("occupation")]
public string? Occupation { get; set; }
}
}
Loading
Loading