diff --git a/docs/decisions/0007-python-subpackages.md b/docs/decisions/0008-python-subpackages.md
similarity index 100%
rename from docs/decisions/0007-python-subpackages.md
rename to docs/decisions/0008-python-subpackages.md
diff --git a/docs/decisions/0009-support-long-running-operations.md b/docs/decisions/0009-support-long-running-operations.md
new file mode 100644
index 0000000000..7227840c8f
--- /dev/null
+++ b/docs/decisions/0009-support-long-running-operations.md
@@ -0,0 +1,1689 @@
+---
+status: accepted
+contact: sergeymenshykh
+date: 2025-10-15
+deciders: markwallace, rbarreto, westey-m, stephentoub
+informed: {}
+---
+
+## Long-Running Operations Design
+
+## Context and Problem Statement
+
+The Agent Framework currently supports synchronous request-response patterns for AI agent interactions,
+where agents process requests and return results immediately. Similarly, MEAI chat clients follow the same
+synchronous pattern for AI interactions. However, many real-world AI scenarios involve complex tasks that
+require significant processing time, such as:
+- Code generation and analysis tasks
+- Complex reasoning and research operations
+- Image and content generation
+- Large document processing and summarization
+
+The current Agent Framework architecture needs native support for long-running operations, as it is
+essential for handling these scenarios effectively. Additionally, as MEAI chat clients need to start supporting
+long-running operations as well to be used together with AF agents, the design should consider integration
+patterns and consistency with the broader Microsoft.Extensions.AI ecosystem to provide a unified experience
+across both agent and chat client scenarios.
+
+## Decision Drivers
+- Chat clients and agents should support long-running execution as well as quick prompts.
+- The design should be simple and intuitive for developers to use.
+- The design should be extensible to allow new long-running execution features to be added in the future.
+- The design should be additive rather than disruptive to allow existing chat clients to iteratively add
+support for long-running operations without breaking existing functionality.
+
+## Comparison of Long-Running Operation Features
+| Feature | OpenAI Responses | Foundry Agents | A2A |
+|-----------------------------|---------------------------|-------------------------------------|----------------------|
+| Initiated by | User (Background = true) | Long-running execution is always on | Agent |
+| Modeled as | Response | Run | Task |
+| Supported modes1 | Sync, Async | Async | Sync, Async |
+| Getting status support | ✅ | ✅ | ✅ |
+| Getting result support | ✅ | ✅ | ✅ |
+| Update support | ❌ | ❌ | ✅ |
+| Cancellation support | ✅ | ✅ | ✅ |
+| Delete support | ✅ | ❌ | ❌ |
+| Non-streaming support | ✅ | ✅ | ✅ |
+| Streaming support | ✅ | ✅ | ✅ |
+| Execution statuses | InProgress, Completed, Queued
Cancelled, Failed, Incomplete | InProgress, Completed, Queued
Cancelled, Failed, Cancelling,
RequiresAction, Expired | Working, Completed, Canceled,
Failed, Rejected, AuthRequired,
InputRequired, Submitted, Unknown |
+
+1 Sync is a regular message-based request/response communication pattern; Async is a pattern for long-running operations/tasks where the agent returns an ID for a run/task and allows polling for status and final results by the ID.
+
+**Note:** The names for new classes, interfaces, and their members used in the sections below are tentative and will be discussed in a dedicated section of this document.
+
+## Long-Running Operations Support for Chat Clients
+
+This section describes different options for various aspects required to add long-running operations support to chat clients.
+
+### 1. Methods for Working with Long-Running Operations
+
+Based on the analysis of existing APIs that support long-running operations (such as OpenAI Responses, Azure AI Foundry Agents, and A2A),
+the following operations are used for working with long-running operations:
+- Common operations:
+ - **Start Long-Running Execution**: Initiates a long-running operation and returns its Id.
+ - **Get Status of Long-Running Execution**: This method retrieves the status of a long-running operation.
+ - **Get Result of Long-Running Execution**: Retrieves the result of a long-running operation.
+- Uncommon operations:
+ - **Update Long-Running Execution**: This method updates a long-running operation, such as adding new messages or modifying existing ones.
+ - **Cancel Long-Running Execution**: This method cancels a long-running operation.
+ - **Delete Long-Running Execution**: This method deletes a long-running operation.
+
+To support these operations by `IChatClient` implementations, the following options are available:
+- **1.1 New IAsyncChatClient Interface for All Long-Running Execution Operations**
+- **1.2 Get{Streaming}ResponseAsync for Common Operations & New IAsyncChatClient Interface for Uncommon Operations**
+- **1.3 Get{Streaming}ResponseAsync for Common Operations & New IAsyncChatClient Interface for Uncommon Operations & Capability Check**
+- **1.4 Get{Streaming}ResponseAsync for Common Operations & Individual Interface per Uncommon Operation**
+
+#### 1.1 New IAsyncChatClient Interface for All Long-Running Execution Operations
+
+This option suggests adding a new interface `IAsyncChatClient` that some implementations of `IChatClient` may implement to support long-running operations.
+```csharp
+public interface IAsyncChatClient
+{
+ Task StartAsyncRunAsync(IList chatMessages, RunOptions? options = null, CancellationToken ct = default);
+ Task GetAsyncRunStatusAsync(string runId, CancellationToken ct = default);
+ Task GetAsyncRunResultAsync(string runId, CancellationToken ct = default);
+ Task UpdateAsyncRunAsync(string runId, IList chatMessages, CancellationToken ct = default);
+ Task CancelAsyncRunAsync(string runId, CancellationToken ct = default);
+ Task DeleteAsyncRunAsync(string runId, CancellationToken ct = default);
+}
+
+public class CustomChatClient : IChatClient, IAsyncChatClient
+{
+ ...
+}
+```
+
+Consumer code example:
+```csharp
+IChatClient chatClient = new CustomChatClient();
+
+string prompt = "..."
+
+// Determine if the prompt should be run as a long-running execution
+if(chatClient.GetService() is { } asyncChatClient && ShouldRunPromptAsynchronously(prompt))
+{
+ try
+ {
+ // Start a long-running execution
+ AsyncRunResult result = await asyncChatClient.StartAsyncRunAsync(prompt);
+ }
+ catch (NotSupportedException)
+ {
+ Console.WriteLine("This chat client does not support long-running operations.");
+ throw;
+ }
+
+ AsyncRunContent? asyncRunContent = GetAsyncRunContent(result);
+
+ // Poll for the status of the long-running execution
+ while (asyncRunContent.Status is AsyncRunStatus.InProgress or AsyncRunStatus.Queued)
+ {
+ result = await asyncChatClient.GetAsyncRunStatusAsync(asyncRunContent.RunId);
+ asyncRunContent = GetAsyncRunContent(result);
+ }
+
+ // Get the result of the long-running execution
+ result = await asyncChatClient.GetAsyncRunStatusAsync(asyncRunContent.RunId);
+ Console.WriteLine(result);
+}
+else
+{
+ // Complete a quick prompt
+ ChatResponse response = await chatClient.GetResponseAsync(prompt);
+ Console.WriteLine(response);
+}
+```
+
+**Pros:**
+- Not a breaking change: Existing chat clients are not affected.
+- Callers can determine if a chat client supports long-running operations by calling its `GetService()` method.
+
+**Cons:**
+- Not extensible: Adding new methods to the `IAsyncChatClient` interface after its release will break existing implementations of the interface.
+- Missing capability check: Callers cannot determine if chat clients support specific uncommon operations before attempting to use them.
+- Insufficient information: Callers may not have enough information to decide whether a prompt should run as a long-running operation.
+- The new method calls bypass existing decorators such as logging, telemetry, etc.
+- An alternative solution for decorating the new methods will have to be put in place because the new method calls bypass existing decorators
+such as logging, telemetry, etc.
+
+#### 1.2 Get{Streaming}ResponseAsync for Common Operations & New IAsyncChatClient Interface for Uncommon Operations
+
+This option suggests using the existing `GetResponseAsync` and `GetStreamingResponseAsync` methods of the `IChatClient` interface to support
+common long-running operations, such as starting long-running operations, getting their status, their results, and potentially
+updating them, in addition to their existing functionality of serving quick prompts. Methods for the uncommon operations, such as updating,
+cancelling, and deleting long-running operations, will be added to a new `IAsyncChatClient` interface that will be implemented by chat clients
+that support them.
+
+This option presumes that Option 3.2 (Have one method for getting long-running execution status and result) is selected.
+
+```csharp
+public interface IAsyncChatClient
+{
+ /// The update can be handled by GetResponseAsync method as well.
+ Task UpdateAsyncRunAsync(string runId, IList chatMessages, CancellationToken ct = default);
+
+ Task CancelAsyncRunAsync(string runId, CancellationToken ct = default);
+ Task DeleteAsyncRunAsync(string runId, CancellationToken ct = default);
+}
+
+public class ResponsesChatClient : IChatClient, IAsyncChatClient
+{
+ public async Task GetResponseAsync(string prompt, ChatOptions? options = null, CancellationToken ct = default)
+ {
+ ClientResult? result = null;
+
+ // If long-running execution mode is enabled, we run the prompt as a long-running execution
+ if(enableLongRunningResponses)
+ {
+ // No RunId is provided, so we start a long-running execution
+ if(options?.RunId is null)
+ {
+ result = await this._openAIResponseClient.CreateResponseAsync(prompt, new ResponseCreationOptions
+ {
+ Background = true,
+ });
+ }
+ else // RunId is provided, so we get the status of a long-running execution
+ {
+ result = await this._openAIResponseClient.GetResponseAsync(options.RunId);
+ }
+ }
+ else
+ {
+ // Handle the case when the prompt should be run as a quick prompt
+ result = await this._openAIResponseClient.CreateResponseAsync(prompt, new ResponseCreationOptions
+ {
+ Background = false
+ });
+ }
+
+ ...
+ }
+
+ public Task UpdateAsyncRunAsync(string runId, IList chatMessages, CancellationToken ct = default)
+ {
+ throw new NotSupportedException("This chat client does not support updating long-running operations.");
+ }
+
+ public Task CancelAsyncRunAsync(string runId, CancellationToken cancellationToken = default)
+ {
+ return this._openAIResponseClient.CancelResponseAsync(runId, cancellationToken);
+ }
+
+ public Task DeleteAsyncRunAsync(string runId, CancellationToken cancellationToken = default)
+ {
+ return this._openAIResponseClient.DeleteResponseAsync(runId, cancellationToken);
+ }
+}
+```
+
+Consumer code example:
+```csharp
+IChatClient chatClient = new ResponsesChatClient();
+
+ChatResponse response = await chatClient.GetResponseAsync("");
+
+if (GetAsyncRunContent(response) is AsyncRunContent asyncRunContent)
+{
+ // Get result of the long-running execution
+ response = await chatClient.GetResponseAsync([], new ChatOptions
+ {
+ RunId = asyncRunContent.RunId
+ });
+
+ // After some time
+
+ // If it's still running, cancel and delete the run
+ if (GetAsyncRunContent(response).Status is AsyncRunStatus.InProgress or AsyncRunStatus.Queued)
+ {
+ IAsyncChatClient? asyncChatClient = chatClient.GetService();
+
+ try
+ {
+ await asyncChatClient?.CancelAsyncRunAsync(asyncRunContent.RunId);
+ }
+ catch (NotSupportedException)
+ {
+ Console.WriteLine("This chat client does not support cancelling long-running operations.");
+ }
+
+ try
+ {
+ await asyncChatClient?.DeleteAsyncRunAsync(asyncRunContent.RunId);
+ }
+ catch (NotSupportedException)
+ {
+ Console.WriteLine("This chat client does not support deleting long-running operations.");
+ }
+ }
+}
+else
+{
+ // Handle the case when the response is a quick prompt completion
+ Console.WriteLine(response);
+}
+```
+
+This option addresses the issue that the option above has with callers needing to know whether the prompt should
+be run as a long-running operation or a quick prompt. It allows callers to simply call the existing `GetResponseAsync` method,
+and the chat client will decide whether to run the prompt as a long-running operation or a quick prompt. If control over
+the execution mode is still needed, and the underlying API supports it, it will be possible for callers to set the mode at
+the chat client invocation or configuration. More details about this are provided in one of the sections below about enabling long-running operation mode.
+
+Additionally, it addresses another issue where the `GetResponseAsync` method may return a long-running
+execution response and the `StartAsyncRunAsync` method may return a quick prompt response. Having one method that handles both cases
+allows callers to not worry about this behavior and simply check the type of the response to determine if it is a long-running operation
+or a quick prompt completion.
+
+With the `GetResponseAsync` method becoming responsible for starting, getting status, getting results and updating long-running operations,
+there are only a few operations left in the `IAsyncChatClient` interface - cancel and delete. As a result, the `IAsyncChatClient` interface
+name may not be the best fit, as it suggests that it is responsible for all long-running operations while it is not. Should
+the interface be renamed to reflect the operations it supports? What should the new name be? Option 1.4 considers an alternative
+that might solve the naming issue.
+
+**Pros:**
+- Delegation and control: Callers delegate the decision of whether to run a prompt as a long-running operation or quick prompt to chat clients,
+while still having the option to control the execution mode to determine how to handle prompts if needed.
+- Not a breaking change: Existing chat clients are not affected.
+
+**Cons:**
+- Not extensible: Adding new methods to the `IAsyncChatClient` interface after its release will break existing implementations of the interface.
+- Missing capability check: Callers cannot determine if chat clients support specific uncommon operations before attempting to use them.
+- An alternative solution for decorating the new methods will have to be put in place because the new method calls bypass existing decorators
+such as logging, telemetry, etc.
+
+#### 1.3 Get{Streaming}ResponseAsync for Common Operations & New IAsyncChatClient Interface for Uncommon Operations & Capability Check
+
+This option extends the previous option with a way for callers to determine if a chat client supports uncommon operations before attempting to use them.
+
+```csharp
+public interface IAsyncChatClient
+{
+ bool CanUpdateAsyncRun { get; }
+ bool CanCancelAsyncRun { get; }
+ bool CanDeleteAsyncRun { get; }
+
+ Task UpdateAsyncRunAsync(string runId, IList chatMessages, CancellationToken ct = default);
+ Task CancelAsyncRunAsync(string runId, CancellationToken ct = default);
+ Task DeleteAsyncRunAsync(string runId, CancellationToken ct = default);
+}
+
+public class ResponsesChatClient : IChatClient, IAsyncChatClient
+{
+ public async Task GetResponseAsync(string prompt, ChatOptions? options = null, CancellationToken ct = default)
+ {
+ ...
+ }
+
+ public bool CanUpdateAsyncRun => false; // This chat client does not support updating long-running operations.
+ public bool CanCancelAsyncRun => true; // This chat client supports cancelling long-running operations.
+ public bool CanDeleteAsyncRun => true; // This chat client supports deleting long-running operations.
+
+ public Task UpdateAsyncRunAsync(string runId, IList chatMessages, CancellationToken ct = default)
+ {
+ throw new NotSupportedException("This chat client does not support updating long-running operations.");
+ }
+
+ public Task CancelAsyncRunAsync(string runId, CancellationToken cancellationToken = default)
+ {
+ return this._openAIResponseClient.CancelResponseAsync(runId, cancellationToken);
+ }
+
+ public Task DeleteAsyncRunAsync(string runId, CancellationToken cancellationToken = default)
+ {
+ return this._openAIResponseClient.DeleteResponseAsync(runId, cancellationToken);
+ }
+}
+```
+
+Consumer code example:
+```csharp
+IChatClient chatClient = new ResponsesChatClient();
+
+ChatResponse response = await chatClient.GetResponseAsync("");
+
+if (GetAsyncRunContent(response) is AsyncRunContent asyncRunContent)
+{
+ // Get result of the long-running execution
+ response = await chatClient.GetResponseAsync([], new ChatOptions
+ {
+ RunId = asyncRunContent.RunId
+ });
+
+ // After some time
+
+ IAsyncChatClient? asyncChatClient = chatClient.GetService();
+
+ // If it's still running, cancel and delete the run
+ if (GetAsyncRunContent(response).Status is AsyncRunStatus.InProgress or AsyncRunStatus.Queued)
+ {
+ if(asyncChatClient?.CanCancelAsyncRun ?? false)
+ {
+ await asyncChatClient?.CancelAsyncRunAsync(asyncRunContent.RunId);
+ }
+
+ if(asyncChatClient?.CanDeleteAsyncRun ?? false)
+ {
+ await asyncChatClient?.DeleteAsyncRunAsync(asyncRunContent.RunId);
+ }
+ }
+}
+else
+{
+ // Handle the case when the response is a quick prompt completion
+ Console.WriteLine(response);
+}
+```
+
+**Pros:**
+- Delegation and control: Callers delegate the decision of whether to run a prompt as a long-running execution or quick prompt to chat clients,
+while still having the option to control the execution mode to determine how to handle prompts if needed.
+- Not a breaking change: Existing chat clients are not affected.
+- Capability check: Callers can determine if the chat client supports an uncommon operation before attempting to use it.
+
+**Cons:**
+- Not extensible: Adding new members to the `IAsyncChatClient` interface after its release will break existing implementations of the interface.
+- An alternative solution for decorating the new methods will have to be put in place because the new method calls bypass existing decorators
+such as logging, telemetry, etc.
+
+#### 1.4 Get{Streaming}ResponseAsync for Common Operations & Individual Interface per Uncommon Operation
+
+This option suggests using the existing `Get{Streaming}ResponseAsync` methods of the `IChatClient` interface to support
+common long-running operations, such as starting long-running operations, getting their status, and their results, and potentially
+updating them, in addition to their existing functionality of serving quick prompts.
+
+The uncommon operations that are not supported by all analyzed APIs, such as updating (which can be handled by `Get{Streaming}ResponseAsync`), cancelling,
+and deleting long-running operations, as well as future ones, will be added to their own interfaces that will be implemented by chat clients
+that support them.
+
+This option presumes that Option 3.2 (Have one method for getting long-running execution status and result) is selected.
+
+The interfaces can inherit from `IChatClient` to allow callers to use an instance of `ICancelableChatClient`, `IUpdatableChatClient`, or `IDeletableChatClient`
+for calling the `Get{Streaming}ResponseAsync` methods as well. However, those methods belong to a leaf chat client that, if obtained via the `GetService()`
+method, won't be decorated by existing decorators such as function invocation, logging, etc. As a result, an alternative solution (wrap the instance of the leaf
+chat client in a decorator at the `GetService` method call) will need to be applied not only to the new methods of one of the interfaces but also to the existing
+`Get{Streaming}ResponseAsync` ones.
+
+```csharp
+public interface ICancelableChatClient
+{
+ Task CancelAsyncRunAsync(string runId, CancellationToken cancellationToken = default);
+}
+
+public interface IUpdatableChatClient
+{
+ Task UpdateAsyncRunAsync(string runId, IList chatMessages, CancellationToken cancellationToken = default);
+}
+
+public interface IDeletableChatClient
+{
+ Task DeleteAsyncRunAsync(string runId, CancellationToken cancellationToken = default);
+}
+
+// Responses chat client that supports standard long-running operations + cancellation and deletion
+public class ResponsesChatClient : IChatClient, ICancelableChatClient, IDeletableChatClient
+{
+ public async Task GetResponseAsync(string prompt, ChatOptions? options = null, CancellationToken ct = default)
+ {
+ ...
+ }
+
+ public Task CancelAsyncRunAsync(string runId, CancellationToken cancellationToken = default)
+ {
+ return this._openAIResponseClient.CancelResponseAsync(runId, cancellationToken);
+ }
+
+ public Task DeleteAsyncRunAsync(string runId, CancellationToken cancellationToken = default)
+ {
+ return this._openAIResponseClient.DeleteResponseAsync(runId, cancellationToken);
+ }
+}
+```
+
+Example that starts a long-running operation, gets its status, and cancels and deletes it if it's not completed after some time:
+```csharp
+IChatClient chatClient = new ResponsesChatClient();
+
+ChatResponse response = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = true });
+
+if (GetAsyncRunContent(response) is AsyncRunContent asyncRunContent)
+{
+ // Get result
+ response = await chatClient.GetResponseAsync([], new ChatOptions
+ {
+ RunId = asyncRunContent.RunId
+ });
+
+ // After some time
+
+ // If it's still running, cancel and delete the run
+ if (GetAsyncRunContent(response).Status is AsyncRunStatus.InProgress or AsyncRunStatus.Queued)
+ {
+ if(chatClient.GetService() is {} cancelableChatClient)
+ {
+ await cancelableChatClient.CancelAsyncRunAsync(asyncRunContent.RunId);
+ }
+
+ if(chatClient.GetService() is {} deletableChatClient)
+ {
+ await deletableChatClient.DeleteAsyncRunAsync(asyncRunContent.RunId);
+ }
+ }
+}
+```
+
+**Pros:**
+- Extensible: New interfaces can be added and implemented to support new long-running operations without breaking
+existing chat client implementations.
+- Not a breaking change: Existing chat clients that implement the `IChatClient` interface are not affected.
+- Delegation and control: Callers delegate the decision of whether to run a prompt as a long-running operation or quick prompt
+to chat clients, while still having the option to control the execution mode to determine how to handle prompts if needed.
+
+**Cons:**
+- Breaking changes: Changing the signatures of the methods of the operation-specific interfaces or adding new members to them will
+break existing implementations of those interfaces. However, the blast radius of this change is much smaller and limited to a subset
+of chat clients that implement the operation-specific interfaces. However, this is still a breaking change.
+
+### 2. Enabling Long-Running Operations
+
+Based on the API analysis, some APIs must be explicitly configured to run in long-running operation mode,
+while others don't need additional configuration because they either decide themselves whether a request
+should run as a long-running operation, or they always operate in long-running operation mode or quick prompt mode:
+| Feature | OpenAI Responses | Foundry Agents | A2A |
+|-----------------------------|---------------------------|-------------------------------------|----------------------|
+| Long-running execution | User (Background = true) | Long-running execution is always on | Agent |
+
+The options below consider how to enable long-running operation mode for chat clients that support both quick prompts and long-running operations.
+
+#### 2.1 Execution Mode per `Get{Streaming}ResponseAsync` Invocation
+
+This option proposes adding a new nullable `AllowLongRunningResponses` property to the `ChatOptions` class.
+The property value will be `true` if the caller requests a long-running operation, `false`, `null` or omitted otherwise.
+
+Chat clients that work with APIs requiring explicit configuration per operation will use this property to determine whether to run the prompt as a long-running
+operation or quick prompt. Chat clients that work with APIs that don't require explicit configuration will ignore this property and operate according
+to their own logic/configuration.
+
+```csharp
+public class ChatOptions
+{
+ // Existing properties...
+ public bool? AllowLongRunningResponses { get; set; }
+}
+
+// Consumer code example
+IChatClient chatClient = ...; // Get an instance of IChatClient
+
+// Start a long-running execution for the prompt if supported by the underlying API
+ChatResponse response = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = true });
+
+// Start a quick prompt
+ChatResponse quickResponse = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = false });
+```
+
+**Pros:**
+- Callers can switch between quick prompts and long-running operation per invocation of the `Get{Streaming}ResponseAsync` methods without
+changing the client configuration.
+- Enables explicit control over the execution mode by callers per invocation, meaning that no caller site is broken if the agent is injected via DI,
+and the caller can turn on the long-running operation mode when it can handle it.
+
+**Con:** This may not be valuable for all callers, as they may not have enough information to decide whether the prompt should run as a long-running operation or quick prompt.
+
+#### 2.2 Execution Mode per `Get{Streaming}ResponseAsync` Invocation + Model Class
+
+This option is similar to the previous one, but suggest using a model class `LongRunningResponsesOptions` for properties related to long-running operations.
+
+```csharp
+public class LongRunningResponsesOptions
+{
+ public bool? Allow { get; set; }
+ //public PollingSettings? PollingSettings { get; set; } // Can be added leter if necessary
+}
+
+public class ChatOptions
+{
+ public LongRunningResponsesOptions? LongRunningResponsesOptions { get; set; }
+}
+
+// Consumer code example
+IChatClient chatClient = ...; // Get an instance of IChatClient
+
+// Start a long-running execution for the prompt if supported by the underlying API
+ChatResponse response = await chatClient.GetResponseAsync("", new ChatOptions { LongRunningResponsesOptions = new() { Allow = true } });
+```
+
+**Pros:**
+- Enables explicit control over the execution mode by callers per invocation, meaning that no caller site is broken if the agent is injected via DI,
+and the caller can turn on the long-running operation mode when it can handle it.
+- No proliferation of long-running operation-related properties in the `ChatOptions` class.
+
+**Con:** Slightly more complex initialization.
+
+#### 2.3 Execution Mode per Chat Client Instance
+
+This option proposes adding a new `enableLongRunningResponses` parameter to constructors of chat clients that support both quick prompts and long-running operations.
+The parameter value will be `true` if the chat client should operate in long-running operation mode, `false` if it should operate in quick prompt mode.
+
+Chat clients that work with APIs requiring explicit configuration will use this parameter to determine whether to run prompts as long-running operations or quick prompts.
+Chat clients that work with APIs that don't require explicit configuration won't have this parameter in their constructors and will operate according to their own
+logic/configuration.
+
+```csharp
+public class CustomChatClient : IChatClient
+{
+ private readonly bool _enableLongRunningResponses;
+
+ public CustomChatClient(bool enableLongRunningResponses)
+ {
+ this._enableLongRunningResponses = enableLongRunningResponses;
+ }
+
+ // Existing methods...
+}
+
+// Consumer code example
+IChatClient chatClient = new CustomChatClient(enableLongRunningResponses: true);
+
+// Start a long-running execution for the prompt
+ChatResponse response = await chatClient.GetResponseAsync("");
+```
+
+Chat clients can be configured to always operate in long-running operation mode or quick prompt mode based on their role in a specific scenario.
+For example, a chat client responsible for generating ideas for images can be configured for quick prompt mode, while a chat client responsible for image
+generation can be configured to always use long-running operation mode.
+
+**Pro:** Can be beneficial for scenarios where chat clients need to be configured upfront in accordance with their role in a scenario.
+
+**Con:** Less flexible than the previous option, as it requires configuring the chat client upfront at instantiation time. However, this flexibility might not be needed.
+
+#### 2.4 Combined Approach
+
+This option proposes a combined approach that allows configuration per chat client instance and per `Get{Streaming}ResponseAsync` method invocation.
+
+The chat client will use whichever configuration is provided, whether set in the chat client constructor or in the options for the `Get{Streaming}ResponseAsync`
+method invocation. If both are set, the one provided in the `Get{Streaming}ResponseAsync` method invocation takes precedence.
+
+```csharp
+public class CustomChatClient : IChatClient
+{
+ private readonly bool _enableLongRunningResponses;
+
+ public CustomChatClient(bool enableLongRunningResponses)
+ {
+ this._enableLongRunningResponses = enableLongRunningResponses;
+ }
+
+ public async Task GetResponseAsync(string prompt, ChatOptions? options = null, CancellationToken ct = default)
+ {
+ bool enableLongRunningResponses = options?.AllowLongRunningResponses ?? this._enableLongRunningResponses;
+ // Logic to handle the prompt based on enableLongRunningResponses...
+ }
+}
+
+// Consumer code example
+IChatClient chatClient = new CustomChatClient(enableLongRunningResponses: true);
+
+// Start a long-running execution for the prompt
+ChatResponse response = await chatClient.GetResponseAsync("");
+
+// Start a quick prompt
+ChatResponse quickResponse = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = false });
+```
+
+**Pros:** Flexible approach that combines the benefits of both previous options.
+
+### 3. Getting Status and Result of Long-Running Execution
+
+The explored APIs use different approaches for retrieving the status and results of long-running operations. Some are using
+one method to retrieve both status and result, while others use two separate methods for each operation:
+| Feature | OpenAI Responses | Foundry Agents | A2A |
+|-------------------|-------------------------------|----------------------------------------------------|-----------------------|
+| API to Get Status | GetResponseAsync(responseId) | Runs.GetRunAsync(thread.Id, threadRun.Id) | GetTaskAsync(task.Id) |
+| API to Get Result | GetResponseAsync(responseId) | Messages.GetMessagesAsync(thread.Id, threadRun.Id) | GetTaskAsync(task.Id) |
+
+Taking into account the differences, the following options propose a few ways to model the API for getting the status and result of
+long-running operations for the `AIAgent` interface implementations.
+
+#### 3.1 Two Separate Methods for Status and Result
+
+This option suggests having two separate methods for getting the status and result of long-running operations:
+```csharp
+public interface IAsyncChatClient
+{
+ Task GetAsyncRunStatusAsync(string runId, CancellationToken ct = default);
+ Task GetAsyncRunResultAsync(string runId, CancellationToken ct = default);
+}
+```
+
+**Pros:** Could be more intuitive for developers, as it clearly separates the concerns of checking the status and retrieving the result of a long-running operation.
+
+**Cons:** Creates inefficiency for chat clients that use APIs that return both status and result in a single call,
+as callers might make redundant calls to get the result after checking the status that already contains the result.
+
+#### 3.2 One Method to Get Status and Result
+
+This option suggests having a single method for getting both the status and result of long-running operations:
+```csharp
+public interface IAsyncChatClient
+{
+ Task GetAsyncRunResultAsync(string runId, AgentThread? thread = null, CancellationToken ct = default);
+}
+```
+
+This option will redirect the call to the appropriate method of the underlying API that uses one method to retrieve both.
+For APIs that use two separate methods, the method will first get the status and if the status indicates that the
+operation is still running, it will return the status to the caller. If the status indicates that the operation is completed,
+it will then call the method to get the result of the long-running operation and return it together with the status.
+
+**Pros:**
+- Simplifies the API by providing a single, intuitive method for retrieving long-running operation information.
+- More optimal for chat clients that use APIs that return both status and result in a single call, as it avoids unnecessary API calls.
+
+### 4. Place For RunId, Status, and UpdateId of Long-Running Operations
+
+This section considers different options for exposing the `RunId`, `Status`, and `UpdateId` properties of long-running operations.
+
+#### 4.1. As AIContent
+
+The `AsyncRunContent` class will represent a long-running operation initiated and managed by an agent/LLM.
+Items of this content type will be returned in a chat message as part of the `AgentRunResponse` or `ChatResponse`
+response to represent the long-running operation.
+
+The `AsyncRunContent` class has two properties: `RunId` and `Status`. The `RunId` identifies the
+long-running operation, and the `Status` represents the current status of the operation. The class
+inherits from `AIContent`, which is a base class for all AI-related content in MEAI and AF.
+
+The `AsyncRunStatus` class represents the status of a long-running operation. Initially, it will have
+a set of predefined statuses that represent the possible statuses used by existing Agent/LLM APIs that support
+long-running operations. It will be extended to support additional statuses as needed while also
+allowing custom, not-yet-defined statuses to propagate as strings from the underlying API to the callers.
+
+The content class type can be used by both agents and chat clients to represent long-running operations.
+For chat clients to use it, it should be declared in one of the MEAI packages.
+
+```csharp
+public class AsyncRunContent : AIContent
+{
+ public string RunId { get; }
+ public AsyncRunStatus? Status { get; }
+}
+
+public readonly struct AsyncRunStatus : IEquatable
+{
+ public static AsyncRunStatus Queued { get; } = new("Queued");
+ public static AsyncRunStatus InProgress { get; } = new("InProgress");
+ public static AsyncRunStatus Completed { get; } = new("Completed");
+ public static AsyncRunStatus Cancelled { get; } = new("Cancelled");
+ public static AsyncRunStatus Failed { get; } = new("Failed");
+ public static AsyncRunStatus RequiresAction { get; } = new("RequiresAction");
+ public static AsyncRunStatus Expired { get; } = new("Expired");
+ public static AsyncRunStatus Rejected { get; } = new("Rejected");
+ public static AsyncRunStatus AuthRequired { get; } = new("AuthRequired");
+ public static AsyncRunStatus InputRequired { get; } = new("InputRequired");
+ public static AsyncRunStatus Unknown { get; } = new("Unknown");
+
+ public string Label { get; }
+
+ public AsyncRunStatus(string label)
+ {
+ if (string.IsNullOrWhiteSpace(label))
+ {
+ throw new ArgumentException("Label cannot be null or whitespace.", nameof(label));
+ }
+
+ this.Label = label;
+ }
+
+ /// Other members
+}
+````
+
+The streaming API may return an UpdateId identifying a particular update within a streamed response.
+This UpdateId should be available together with RunId to callers, allowing them to resume a long-running operation identified
+by the RunId from the last received update, identified by the UpdateId.
+
+#### 4.2. As Properties Of ChatResponse{Update}
+
+This option suggests adding properties related to long-running operations directly to the `ChatResponse` and `ChatResponseUpdate` classes rather
+than using a separate content class for that. See section "6. Model To Support Long-Running Operations" for more details.
+
+### 5. Streaming Support
+
+All analyzed APIs that support long-running operations also support streaming.
+
+Some of them natively support resuming streaming from a specific point in the stream, while for others, this is either implementation-dependent or needs to be emulated:
+
+| API | Can Resume Streaming | Model |
+|-------------------------|--------------------------------------|------------------------------------------------------------------------------------------------------------|
+| OpenAI Responses | Yes | StreamingResponseUpdate.**SequenceNumber** + GetResponseStreamingAsync(responseId, **startingAfter**, ct) |
+| Azure AI Foundry Agents | Emulated2 | RunStep.**Id** + custom pseudo code: client.Runs.GetRunStepsAsync(...).AllStepsAfter(**stepId**) |
+| A2A | Implementation dependent1 | |
+
+1 The [A2A specification](https://github.com/a2aproject/A2A/blob/main/docs/topics/streaming-and-async.md#1-streaming-with-server-sent-events-sse)
+allows an A2A agent implementation to decide how to handle streaming resumption: _If a client's SSE connection breaks prematurely while
+a task is still active (and the server hasn't sent a final: true event for that phase), the client can attempt to reconnect to the stream using the tasks/resubscribe RPC method.
+The server's behavior regarding missed events during the disconnection period (e.g., whether it backfills or only sends new updates) is implementation-dependent._
+
+2 The Azure AI Foundry Agents API has an API to start a streaming run but does not have an API to resume streaming from a specific point in the stream.
+However, it has non-streaming APIs to access already started runs, which can be used to emulate streaming resumption by accessing a run and its steps and streaming all the steps after a specific step.
+
+#### Required Changes
+
+To support streaming resumption, the following model changes are required:
+
+- The `ChatOptions` class needs to be extended with a new `StartAfter` property that will identify an update to resume streaming from and to start generating responses after.
+- The `ChatResponseUpdate` class needs to be extended with a new `SequenceNumber` property that will identify the update number within the stream.
+
+All the chat clients supporting the streaming resumption will need to return the `SequenceNumber` property as part of the `ChatResponseUpdate` class and
+honor the `StartAfter` property of the `ChatOptions` class.
+
+#### Function Calling
+
+Function calls over streaming are communicated to chat clients through a series of updates. Chat clients accumulate these updates in their internal state to build
+the function call content once the last update has been received. The completed function call content is then returned to the function-calling chat client,
+which eventually invokes it.
+
+Since chat clients keep function call updates in their internal state, resuming streaming from a specific update can be impossible if the resumption request
+is made using a chat client that does not have the previous updates stored. This situation can occur if a host suspends execution during an ongoing function call
+stream and later resumes from that particular update. Because chat clients' internal state is not persisted, they will lack the prior updates needed to continue
+the function call, leading to a failure in resumption.
+
+To address this issue, chat clients can only return sequence numbers for updates that are resumable. For updates that cannot be resumed from, chat clients can
+return the sequence number of the most recent update received before the non-resumable one. This allows callers to resume from that earlier update,
+even if it means re-processing some updates that have already been handled.
+
+Chat clients will continue returning the sequence number of the last resumable update until a new resumable update becomes available. For example, a chat client might
+keep returning sequence number 2, corresponding to the last resumable update received before an update for the first function call. Once **all** function call updates
+are received and processed, and the model returns a non-function call response, the chat client will then return a sequence number, say 10, which corresponds to the
+first non-function call update.
+
+##### Status of Streaming Updates
+
+Different APIs provide different statuses for streamed function call updates
+
+Sequence of updates from OpenAI Responses API to answer the question "What time is it?" using a function call:
+| Id | SN | Update.Kind | Response.Status | ChatResponseUpdate.Status | Description |
+|--------|----|--------------------------|-----------------|---------------------------|---------------------------------------------------|
+| resp_1 | 0 | resp.created | Queued | Queued | |
+| resp_1 | 1 | resp.queued | Queued | Queued | |
+| resp_1 | 2 | resp.in_progress | InProgress | InProgress | |
+| resp_1 | 3 | resp.output_item.added | - | InProgress | |
+| resp_1 | 4 | resp.func_call.args.delta| - | InProgress | |
+| resp_1 | 5 | resp.func_call.args.done | - | InProgress | |
+| resp_1 | 6 | resp.output_item.done | - | InProgress | |
+| resp_1 | 7 | resp.completed | Completed | Complete | |
+| resp_1 | - | - | - | null | FunctionInvokingChatClient yields function result |
+| | | | OpenAI Responses created a new response to handle function call result |
+| resp_2 | 0 | resp.created | Queued | Queued | |
+| resp_2 | 1 | resp.queued | Queued | Queued | |
+| resp_2 | 2 | resp.in_progress | InProgress | InProgress | |
+| resp_2 | 3 | resp.output_item.added | - | InProgress | |
+| resp_2 | 4 | resp.cnt_part.added | - | InProgress | |
+| resp_2 | 5 | resp.output_text.delta | - | InProgress | |
+| resp_2 | 6 | resp.output_text.delta | - | InProgress | |
+| resp_2 | 7 | resp.output_text.delta | - | InProgress | |
+| resp_2 | 8 | resp.output_text.done | - | InProgress | |
+| resp_2 | 9 | resp.cnt_part.done | - | InProgress | |
+| resp_2 | 10 | resp.output_item.done | - | InProgress | |
+| resp_2 | 11 | resp.completed | Completed | Completed | |
+
+Sequence of updates from Azure AI Foundry Agents API to answer the question "What time is it?" using a function call:
+| Id | SN | UpdateKind | Run.Status | Step.Status | Message.Status | ChatResponseUpdate.Status | Description |
+|--------|---------|-------------------|----------------|-------------|-----------------|---------------------------|---------------------------------------------------|
+| run_1 | - | RunCreated | Queued | - | - | Queued | |
+| run_1 | step_1 | - | RequiredAction | InProgress | - | RequiredAction | |
+| TBD | - | - | - | - | - | - | FunctionInvokingChatClient yields function result |
+| run_1 | - | RunStepCompleted | Completed | - | - | InProgress | |
+| run_1 | - | RunQueued | Queued | - | - | Queued | |
+| run_1 | - | RunInProgress | InProgress | - | - | InProgress | |
+| run_1 | step_2 | RunStepCreated | - | InProgress | - | InProgress | |
+| run_1 | step_2 | RunStepInProgress | - | InProgress | - | InProgress | |
+| run_1 | - | MessageCreated | - | - | InProgress | InProgress | |
+| run_1 | - | MessageInProgress | - | - | InProgress | InProgress | |
+| run_1 | - | MessageUpdated | - | - | - | InProgress | |
+| run_1 | - | MessageUpdated | - | - | - | InProgress | |
+| run_1 | - | MessageUpdated | - | - | - | InProgress | |
+| run_1 | - | MessageCompleted | - | - | Completed | InProgress | |
+| run_1 | step_2 | RunStepCompleted | Completed | - | - | InProgress | |
+| run_1 | - | RunCompleted | Completed | - | - | Completed | |
+
+### 6. Model To Support Long-Running Operations
+
+To support long-running operations, the following values need to be returned by the GetResponseAsync and GetStreamingResponseAsync methods:
+- `ResponseId` - identifier of the long-running operation or an entity representing it, such as a task.
+- `ConversationId` - identifier of the conversation or thread the long-running operation is part of. Some APIs, like Azure AI Foundry Agents, use
+ this identifier together with the ResponseId to identify a run.
+- `SequenceNumber` - identifier of an update within a stream of updates. This is required to support streaming resumption by the GetStreamingResponseAsync method only.
+- `Status` - status of the long-running operation: whether it is queued, running, failed, cancelled, completed, etc.
+
+These values need to be supplied to subsequent calls of the GetResponseAsync and GetStreamingResponseAsync methods to get the status and result of long-running operations.
+
+#### 6.1 ChatOptions
+
+The following options consider different ways of extending the `ChatOptions` class to include the following properties to support long-running operations:
+- `AllowLongRunningResponses` - a boolean property that indicates whether the caller allows the chat client to run in long-running operation mode if it's supported by the chat client.
+- `ResponseId` - a string property that represents the identifier of the long-running operation or an entity representing it. A non-null value of this property would indicate to chat clients
+that callers want to get the status and result of an existing long-running operation, identified by the property value, rather than starting a new one.
+- `StartAfter` - a string property that represents the sequence number of an update within a stream of updates so that the chat client can resume streaming after the last received update.
+
+##### 6.1.1 Direct Properties in ChatOptions
+
+```csharp
+public class ChatOptions
+{
+ // Existing properties...
+ /// Gets or sets an optional identifier used to associate a request with an existing conversation.
+ public string? ConversationId { get; set; }
+ ...
+
+ // New properties...
+ public bool? AllowLongRunningResponses { get; set; }
+ public string? ResponseId { get; set; }
+ public string? StartAfter { get; set; }
+}
+
+// Usage example
+var response = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = true });
+
+// If the response indicates a long-running operation, get its status and result
+if(response.Status is {} status)
+{
+ response = await chatClient.GetResponseAsync([], new ChatOptions
+ {
+ AllowLongRunningResponses = true,
+ ResponseId = response.ResponseId,
+ ConversationId = response.ConversationId,
+ //StartAfter = response.SequenceNumber // for GetStreamingResponseAsync only
+ });
+}
+
+```
+
+**Con:** Proliferation of long-running operation properties in the `ChatOptions` class.
+
+##### 6.1.2 LongRunOptions Model Class
+
+```csharp
+public class ChatOptions
+{
+ // Existing properties...
+ public string? ConversationId { get; set; }
+ ...
+
+ // New properties...
+ public bool? AllowLongRunningResponses { get; set; }
+
+ public LongRunOptions? LongRunOptions { get; set; }
+}
+
+public class LongRunOptions
+{
+ public string? ResponseId { get; set; }
+ public string? ConversationId { get; set; }
+ public string? StartAfter { get; set; }
+
+ // Alternatively, ChatResponse can have an extension method ToLongRunOptions.
+ public LongRunOptions FromChatResponse(ChatResponse response)
+ {
+ return new LongRunOptions
+ {
+ ResponseId = response.ResponseId,
+ ConversationId = response.ConversationId,
+ };
+ }
+
+ // Alternatively, ChatResponseUpdate can have an extension method ToLongRunOptions.
+ public LongRunOptions FromChatResponseUpdate(ChatResponseUpdate update)
+ {
+ return new LongRunOptions
+ {
+ ResponseId = update.ResponseId,
+ ConversationId = update.ConversationId,
+ StartAfter = update.SequenceNumber,
+ };
+ }
+}
+
+// Usage example
+var response = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = true });
+
+// If the response indicates a long-running operation, get its status and result
+if(response.Status is {} status)
+{
+ while(status != ResponseStatus.Completed)
+ {
+ response = await chatClient.GetResponseAsync([], new ChatOptions
+ {
+ AllowLongRunningResponses = true,
+ LongRunOptions = LongRunOptions.FromChatResponse(response)
+ // or extension method
+ LongRunOptions = response.ToLongRunOptions()
+ // or implicit conversion
+ LongRunOptions = response
+ });
+ }
+}
+```
+
+**Pro:** No proliferation of long-running operation properties in the `ChatOptions` class.
+
+**Con:** Duplicated property `ConversationId`.
+
+##### 6.1.3 Continuation Token of System.ClientModel.ContinuationToken Type
+
+This option suggests using `System.ClientModel.ContinuationToken` to encapsulate all properties required for long-running operations.
+The continuation token will be returned by chat clients as part of the `ChatResponse` and `ChatResponseUpdate` responses to indicate that
+the response is part of a long-running execution. A null value of the property will indicate that the response is not part of a long-running execution.
+Chat clients will accept a non-null value of the property to indicate that callers want to get the status and result of an existing long-running operation.
+
+Each chat client will implement its own continuation token class that inherits from `ContinuationToken` to encapsulate properties required for long-running operations
+that are specific to the underlying API the chat client works with. For example, for the OpenAI Responses API, the continuation token class will encapsulate
+the `ResponseId` and `SequenceNumber` properties.
+
+```csharp
+public class ChatOptions
+{
+ // Existing properties...
+ public string? ConversationId { get; set; }
+ ...
+
+ // New properties...
+ public bool? AllowLongRunningResponses { get; set; }
+
+ public ContinuationToken? ContinuationToken { get; set; }
+}
+
+internal sealed class LongRunContinuationToken : ContinuationToken
+{
+ public LongRunContinuationToken(string responseId)
+ {
+ this.ResponseId = responseId;
+ }
+
+ public string ResponseId { get; set; }
+
+ public int? SequenceNumber { get; set; }
+
+ public static LongRunContinuationToken FromToken(ContinuationToken token)
+ {
+ if (token is LongRunContinuationToken longRunContinuationToken)
+ {
+ return longRunContinuationToken;
+ }
+
+ BinaryData data = token.ToBytes();
+
+ Utf8JsonReader reader = new(data);
+
+ string responseId = null!;
+ int? startAfter = null;
+
+ reader.Read();
+
+ // Reading functionality
+
+ return new(responseId)
+ {
+ SequenceNumber = startAfter
+ };
+ }
+}
+
+// Usage example
+ChatOptions options = new() { AllowLongRunningResponses = true };
+
+var response = await chatClient.GetResponseAsync("", options);
+
+while (response.ContinuationToken is { } token)
+{
+ options.ContinuationToken = token;
+
+ response = await chatClient.GetResponseAsync([], options);
+}
+
+Console.WriteLine(response.Text);
+```
+
+**Pro:** No proliferation of long-running operation properties in the `ChatOptions` class, including the `Status` property.
+
+##### 6.1.4 Continuation Token of String Type
+
+This options is similar to the previous one but suggests using a string type for the continuation token instead of the `System.ClientModel.ContinuationToken` type.
+
+```csharp
+internal sealed class LongRunContinuationToken
+{
+ public LongRunContinuationToken(string responseId)
+ {
+ this.ResponseId = responseId;
+ }
+
+ public string ResponseId { get; set; }
+
+ public int? SequenceNumber { get; set; }
+
+ public static LongRunContinuationToken Deserialize(string json)
+ {
+ Throw.IfNullOrEmpty(json);
+
+ var token = JsonSerializer.Deserialize(json, OpenAIJsonContext2.Default.LongRunContinuationToken)
+ ?? throw new InvalidOperationException("Failed to deserialize LongRunContinuationToken.");
+
+ return token;
+ }
+
+ public string Serialize()
+ {
+ return JsonSerializer.Serialize(this, OpenAIJsonContext2.Default.LongRunContinuationToken);
+ }
+}
+
+public class ChatOptions
+{
+ public string? ContinuationToken { get; set; }
+}
+```
+
+**Pro:** No dependency on the `System.ClientModel` package.
+
+##### 6.1.5 Continuation Token of a Custom Type
+
+The option is similar the the "6.1.3 Continuation Token of System.ClientModel.ContinuationToken Type" option but suggests using a
+custom type for the continuation token instead of the `System.ClientModel.ContinuationToken` type.
+
+**Pros**
+- There is no dependency on the `System.ClientModel` package.
+- There is no ambiguity between extension methods for `IChatClient` that would occur if a new extension method, which accepts a continuation token of string type as the first parameter, is added.
+
+#### 6.2 Overloads of GetResponseAsync and GetStreamingResponseAsync
+
+This option proposes introducing overloads of the `GetResponseAsync` and `GetStreamingResponseAsync` methods that will accept long-running operation parameters directly:
+
+```csharp
+public interface ILongRunningChatClient
+{
+ Task GetResponseAsync(
+ IEnumerable messages,
+ string responseId,
+ ChatOptions? options = null,
+ CancellationToken cancellationToken = default);
+
+ IAsyncEnumerable GetStreamingResponseAsync(
+ IEnumerable messages,
+ string responseId,
+ string? startAfter = null,
+ ChatOptions? options = null,
+ CancellationToken cancellationToken = default);
+}
+
+public class CustomChatClient : IChatClient, ILongRunningChatClient
+{
+ ...
+}
+
+// Usage example
+IChatClient chatClient = ...; // Get an instance of IChatClient
+
+ChatResponse response = await chatClient.GetResponseAsync("", new ChatOptions { AllowLongRunningResponses = true });
+
+if(response.Status is {} status && chatClient.GetService() is {} longRunningChatClient)
+{
+ while(status != AsyncRunStatus.Completed)
+ {
+ response = await longRunningChatClient.GetResponseAsync([], response.ResponseId, new ChatOptions { ConversationId = response.ConversationId });
+ }
+ ...
+}
+
+```
+
+**Pros:**
+- No proliferation of long-running operation properties in the ChatOptions class, except for the new AllowLongRunningResponses property discussed in section 2.
+
+**Cons:**
+- Interface switching: Callers need to switch to the `ILongRunningChatClient` interface to get the status and result of long-running operations.
+- An alternative solution for decorating the new methods will have to be put in place.
+
+## Long-Running Operations Support for AF Agents
+
+### 1. Methods for Working with Long-Running Operations
+
+The design for supporting long-running operations by agents is very similar to that for chat clients because it is based on
+the same analysis of existing APIs and anticipated consumption patterns.
+
+#### 1.1 Run{Streaming}Async Methods for Common Operations and the Update Operation + New Method Per Uncommon Operation
+
+This option suggests using the existing `Run{Streaming}Async` methods of the `AIAgent` interface implementations to start, get results, and update long-running operations.
+
+For cancellation and deletion of long-running operations, new methods will be added to the `AIAgent` interface implementations.
+
+```csharp
+public abstract class AIAgent
+{
+ // Existing methods...
+ public Task RunAsync(string message, AgentThread? thread = null, AgentRunOptions? options = null, CancellationToken cancellationToken = default) { ... }
+ public IAsyncEnumerable RunStreamingAsync(string message, AgentThread? thread = null, AgentRunOptions? options = null, CancellationToken cancellationToken = default) { ... }
+
+ // New methods for uncommon operations
+ public virtual Task CancelRunAsync(string id, AgentCancelRunOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ return Task.FromResult(null);
+ }
+
+ public virtual Task DeleteRunAsync(string id, AgentDeleteRunOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ return Task.FromResult(null);
+ }
+}
+
+// Agent that supports update and cancellation
+public class CustomAgent : AIAgent
+{
+ public override async Task CancelRunAsync(string id, AgentCancelRunOptions? options = null, CancellationToken cancellationToken = default)
+ {
+ var response = await this._client.CancelRunAsync(id, options?.Thread?.ConversationId);
+
+ return ConvertToAgentRunResponse(response);
+ }
+
+ // No overload for DeleteRunAsync as it's not supported by the underlying API
+}
+
+// Usage
+AIAgent agent = new CustomAgent();
+
+AgentThread thread = agent.GetNewThread();
+
+AgentRunResponse response = await agent.RunAsync("What is the capital of France?");
+
+response = await agent.CancelRunAsync(response.ResponseId, new AgentCancelRunOptions { Thread = thread });
+```
+
+In case an agent supports either or both cancellation and deletion of long-running operations, it will override the corresponding methods.
+Otherwise, it won't override them, and the base implementations will return null by default.
+
+Some agents, for example Azure AI Foundry Agents, require the thread identifier to cancel a run. To accommodate this requirement, the `CancelRunAsync` method
+accepts an optional `AgentCancelRunOptions` parameter that allows callers to specify the thread associated with the run they want to cancel.
+
+```csharp
+public class AgentCancelRunOptions
+{
+ public AgentThread? Thread { get; set; }
+}
+```
+
+Similar design considerations can be applied to the `DeleteRunAsync` method and the `AgentDeleteRunOptions` class.
+
+Having options in the method signatures allows for future extensibility; however, they can be added later if needed to the method overloads.
+
+**Pros:**
+- Existing `Run{Streaming}Async` methods are reused for common operations.
+- New methods for uncommon operations can be added in a non-breaking way.
+
+### 2. Enabling Long-Running Operations
+
+The options for enabling long-running operations are exactly the same as those discussed in section "2. Enabling Long-Running Operations" for chat clients:
+- Execution Mode per `Run{Streaming}Async` Invocation
+- Execution Mode per `Run{Streaming}Async` Invocation + Model Class
+- Execution Mode per agent instance
+- Combined Approach
+
+Below are the details of the option selected for chat clients that is also selected for agents.
+
+#### 2.1 Execution Mode per `Run{Streaming}Async` Invocation
+
+This option proposes adding a new nullable `AllowLongRunningResponses` property of bool type to the `AgentRunOptions` class.
+The property value will be `true` if the caller requests a long-running operation, `false`, `null` or omitted otherwise.
+
+AI agents that work with APIs requiring explicit configuration per operation will use this property to determine whether to run the prompt as a long-running
+operation or quick prompt. Agents that work with APIs that don't require explicit configuration will ignore this property and operate according
+to their own logic/configuration.
+
+```csharp
+public class AgentRunOptions
+{
+ // Existing properties...
+ public bool? AllowLongRunningResponses { get; set; }
+}
+
+// Consumer code example
+AIAgent agent = ...; // Get an instance of an AIAgent
+
+// Start a long-running execution for the prompt if supported by the underlying API
+AgentRunResponse response = await agent.RunAsync("", new AgentRunOptions { AllowLongRunningResponses = true });
+
+// Start a quick prompt
+AgentRunResponse response = await agent.RunAsync("");
+```
+
+**Pros:**
+- Callers can switch between quick prompts and long-running operations per invocation of the `Run{Streaming}Async` methods without
+changing agent configuration.
+- Enables explicit control over the execution mode by callers per invocation, meaning that no caller site is broken if the agent is injected via DI,
+and the caller can turn on the long-running operation mode when it can handle it.
+
+**Con:** This may not be valuable for all callers, as they may not have enough information to decide whether the prompt should run as a long-running operation or quick prompt.
+
+### 3. Model To Support Long-Running Operations
+
+The options for modeling long-running operations are exactly the same as those for chat clients discussed in section "6. Model To Support Long-Running Operations" above:
+- Direct Properties in ChatOptions
+- LongRunOptions Model Class
+- Continuation Token of System.ClientModel.ContinuationToken Type
+- Continuation Token of String Type
+- Continuation Token of a Custom Type
+
+Below are the details of the option selected for chat clients that is also selected for agents.
+
+#### 3.1 Continuation Token of a Custom Type
+
+This option suggests using `ContinuationToken` to encapsulate all properties representing a long-running operation. The continuation token will be returned by agents in the
+`ContinuationToken` property of the `AgentRunResponse` and `AgentRunResponseUpdate` responses to indicate that the response is part of a long-running operation. A null value
+of the property will indicate that the response is not part of a long-running operation or the long-running operation has been completed. Callers will set the token in the
+`ContinuationToken` property of the `AgentRunOptions` class in follow-up calls to the `Run{Streaming}Async` methods to indicate that they want to "continue" the long-running
+operation identified by the token.
+
+Each agent will implement its own continuation token class that inherits from `ContinuationToken` to encapsulate properties required for long-running operations that are
+specific to the underlying API the agent works with. For example, for the A2A agent, the continuation token class will encapsulate the `TaskId` property.
+
+```csharp
+internal sealed class A2AAgentContinuationToken : ResponseContinuationToken
+{
+ public A2AAgentContinuationToken(string taskId)
+ {
+ this.TaskId = taskId;
+ }
+
+ public string TaskId { get; set; }
+
+ public static LongRunContinuationToken FromToken(ContinuationToken token)
+ {
+ if (token is LongRunContinuationToken longRunContinuationToken)
+ {
+ return longRunContinuationToken;
+ }
+
+ ... // Deserialization logic
+ }
+}
+
+public class AgentRunOptions
+{
+ public ResponseContinuationToken? ContinuationToken { get; set; }
+}
+
+public class AgentRunResponse
+{
+ public ResponseContinuationToken? ContinuationToken { get; }
+}
+
+public class AgentRunResponseUpdate
+{
+ public ResponseContinuationToken? ContinuationToken { get; }
+}
+
+// Usage example
+AgentRunResponse response = await agent.RunAsync("What is the capital of France?");
+
+AgentRunOptions options = new() { ContinuationToken = response.ContinuationToken };
+
+while (response.ContinuationToken is { } token)
+{
+ options.ContinuationToken = token;
+ response = await agent.RunAsync([], options);
+}
+
+Console.WriteLine(response.Text);
+```
+
+### 4. Continuation Token and Agent Thread
+
+There are two types of agent threads: server-managed and client-managed. The server-managed threads live server-side and are identified by a conversation identifier, and
+agents use the identifier to associate runs with the threads. The client-managed threads live client-side and are represented by a collection of chat messages that agents maintain
+by adding user messages to them before sending the thread to the service and by adding the agent response back to the thread when received from the service.
+
+When long-running operations are enabled and an agent is configured with tools, the initial run response may contain a tool call that needs to be invoked by the agent. If the agent runs
+with a server-managed thread, the tool call will be captured as part of the conversation history server-side and follow-up runs will have access to it, and as a result the agent will invoke the tool.
+However, if no thread is provided at the agent's initial run and a client-managed thread is provided for follow-up runs and the agent calls a tool, the tool call which the agent made
+at the initial run will not be added to the client-managed thread since the initial run was made with no thread, and as a result the agent will not be able to invoke the tool.
+
+#### 4.1 Require Thread for Long-Running Operations
+
+This option suggests that AI agents require a thread to be provided when long-running operations are enabled. If no thread is provided, the agent will throw an exception.
+
+**Pro:** Ensures agent responses are always captured by client-managed threads when long-running operations are enabled, providing a consistent experience for callers.
+
+**Con:** May be inconvenient for callers to always provide a thread when long-running operations are enabled.
+
+#### 4.2 Don't Require Thread for Long-Running Operations
+
+This option suggests that AI agents don't require a thread to be provided when long-running operations are enabled. According to this option, it's up to the caller to ensure that
+the thread is provided with background operations consistently for all runs.
+
+**Pro:** Provides more flexibility to callers by not enforcing thread requirements.
+
+**Con:** May lead to an inconsistent experience for callers if they forget to provide the thread for initial or follow-up runs.
+
+## Decision Outcome
+
+### Long-Running Execution Support for Chat Clients
+- **Methods**: Option 1.4 - Use existing `Get{Streaming}ResponseAsync` for common operations; individual interfaces for uncommon operations (e.g., `ICancelableChatClient`)
+- **Enabling**: Option 2.1 - Execution mode per invocation via `ChatOptions.AllowLongRunningResponses`
+- **Status/Result**: Option 3.2 - Single method to get both status and result
+- **RunId/UpdateId**: Option 4.2 - As properties of `ChatResponse{Update}`
+- **Model**: Option 6.1.5 - Custom continuation token type
+
+### Long-Running Operations Support for AF Agents
+- **Methods**: Option 1.1 - Use existing `Run{Streaming}Async` for common operations; new methods for uncommon operations
+- **Enabling**: Option 2.1 - Execution mode per invocation via `AgentRunOptions.AllowLongRunningResponses`
+- **Model**: Option 3.1 - Custom continuation token type
+- **Thread Requirement**: Option 4.1 - Require thread for long-running operations
+
+## Addendum 1: APIs of Agents Supporting Long-Running Execution
+
+OpenAI Responses
+
+- Create a background response and wait for it to complete using polling:
+ ```csharp
+ ClientResult result = await this._openAIResponseClient.CreateResponseAsync("What is SLM in AI?", new ResponseCreationOptions
+ {
+ Background = true,
+ });
+
+ // InProgress, Completed, Cancelled, Queued, Incomplete, Failed
+ while (result.Value.Status is (ResponseStatus.Queued or ResponseStatus.InProgress))
+ {
+ Thread.Sleep(500); // Wait for 0.5 seconds before checking the status again
+ result = await this._openAIResponseClient.GetResponseAsync(result.Value.Id);
+ }
+
+ Console.WriteLine($"Response Status: {result.Value.Status}"); // Completed
+ Console.WriteLine(result.Value.GetOutputText()); // SLM in the context of AI refers to ...
+ ```
+
+- Cancel a background response:
+ ```csharp
+ ...
+ ClientResult result = await this._openAIResponseClient.CreateResponseAsync("What is SLM in AI?", new ResponseCreationOptions
+ {
+ Background = true,
+ });
+
+ result = await this._openAIResponseClient.CancelResponseAsync(result.Value.Id);
+
+ Console.WriteLine($"Response Status: {result.Value.Status}"); // Cancelled
+ ```
+
+- Delete a background response:
+ ```csharp
+ ClientResult result = await this._openAIResponseClient.CreateResponseAsync("What is SLM in AI?", new ResponseCreationOptions
+ {
+ Background = true,
+ });
+
+ ClientResult deleteResult = await this._openAIResponseClient.DeleteResponseAsync(result.Value.Id);
+
+ Console.WriteLine($"Response Deleted: {deleteResult.Value.Deleted}"); // True if the response was deleted successfully
+ ```
+
+- Streaming a background response
+ ```csharp
+ await foreach (StreamingResponseUpdate update in this._openAIResponseClient.CreateResponseStreamingAsync("What is SLM in AI?", new ResponseCreationOptions { Background = true }))
+ {
+ Console.WriteLine($"Sequence Number: {update.SequenceNumber}"); // 0, 1, 2, etc.
+
+ switch (update)
+ {
+ case StreamingResponseCreatedUpdate createdUpdate:
+ Console.WriteLine($"Response Status: {createdUpdate.Response.Status}"); // Queued
+ break;
+ case StreamingResponseQueuedUpdate queuedUpdate:
+ Console.WriteLine($"Response Status: {queuedUpdate.Response.Status}"); // Queued
+ break;
+ case StreamingResponseInProgressUpdate inProgressUpdate:
+ Console.WriteLine($"Response Status: {inProgressUpdate.Response.Status}"); // InProgress
+ break;
+ case StreamingResponseOutputItemAddedUpdate outputItemAddedUpdate:
+ Console.WriteLine($"Output index: {outputItemAddedUpdate.OutputIndex}");
+ Console.WriteLine($"Item Id: {outputItemAddedUpdate.Item.Id}");
+ break;
+ case StreamingResponseContentPartAddedUpdate contentPartAddedUpdate:
+ Console.WriteLine($"Output Index: {contentPartAddedUpdate.OutputIndex}");
+ Console.WriteLine($"Item Id: {contentPartAddedUpdate.ItemId}");
+ Console.WriteLine($"Content Index: {contentPartAddedUpdate.ContentIndex}");
+ break;
+ case StreamingResponseOutputTextDeltaUpdate outputTextDeltaUpdate:
+ Console.WriteLine($"Output Index: {outputTextDeltaUpdate.OutputIndex}");
+ Console.WriteLine($"Item Id: {outputTextDeltaUpdate.ItemId}");
+ Console.WriteLine($"Content Index: {outputTextDeltaUpdate.ContentIndex}");
+ Console.WriteLine($"Delta: {outputTextDeltaUpdate.Delta}"); // SL>M> in> AI> typically>....
+ break;
+ case StreamingResponseOutputTextDoneUpdate outputTextDoneUpdate:
+ Console.WriteLine($"Output Index: {outputTextDoneUpdate.OutputIndex}");
+ Console.WriteLine($"Item Id: {outputTextDoneUpdate.ItemId}");
+ Console.WriteLine($"Content Index: {outputTextDoneUpdate.ContentIndex}");
+ Console.WriteLine($"Text: {outputTextDoneUpdate.Text}"); // SLM in the context of AI typically refers to ...
+ break;
+ case StreamingResponseContentPartDoneUpdate contentPartDoneUpdate:
+ Console.WriteLine($"Output Index: {contentPartDoneUpdate.OutputIndex}");
+ Console.WriteLine($"Item Id: {contentPartDoneUpdate.ItemId}");
+ Console.WriteLine($"Content Index: {contentPartDoneUpdate.ContentIndex}");
+ Console.WriteLine($"Text: {contentPartDoneUpdate.Part.Text}"); // SLM in the context of AI typically refers to ...
+ break;
+ case StreamingResponseOutputItemDoneUpdate outputItemDoneUpdate:
+ Console.WriteLine($"Output Index: {outputItemDoneUpdate.OutputIndex}");
+ Console.WriteLine($"Item Id: {outputItemDoneUpdate.Item.Id}");
+ break;
+ case StreamingResponseCompletedUpdate completedUpdate:
+ Console.WriteLine($"Response Status: {completedUpdate.Response.Status}"); // Completed
+ Console.WriteLine($"Output: {completedUpdate.Response.GetOutputText()}"); // SLM in the context of AI typically refers to ...
+ break;
+ default:
+ Console.WriteLine($"Unexpected update type: {update.GetType().Name}");
+ break;
+ }
+ }
+ ```
+
+ Docs: [OpenAI background mode](https://platform.openai.com/docs/guides/background)
+
+- Background Mode Disabled
+
+ - Non-streaming API - returns the final result
+ | Method Call | Status | Result | Notes |
+ |-------------------------------------|-----------|---------------------------------|-------------------------------------|
+ | CreateResponseAsync(msgs, opts, ct) | Completed | The capital of France is Paris. | |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | response is less than 5 minutes old |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | response is more than 5 minutes old |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | response is more than 12 hours old |
+
+ | Cancellation Method | Result |
+ |---------------------|--------------------------------------|
+ | CancelResponseAsync | Cannot cancel a synchronous response |
+
+ - Streaming API - returns streaming updates callers can iterate over to get the result
+ | Method Call | Status | Result |
+ |----------------------------------------------|------------|----------------------------------------------------------------------------------|
+ | CreateResponseStreamingAsync(msgs, opts, ct) | - | updates |
+ | Iterating over updates | InProgress | - |
+ | Iterating over updates | InProgress | - |
+ | Iterating over updates | InProgress | The |
+ | Iterating over updates | InProgress | capital |
+ | Iterating over updates | InProgress | ... |
+ | Iterating over updates | InProgress | Paris. |
+ | Iterating over updates | Completed | The capital of France is Paris. |
+ | GetStreamingResponseAsync(responseId, ct) | - | HTTP 400 - Response cannot be streamed, it was not created with background=true. |
+
+ | Cancellation Method | Result |
+ |---------------------|--------------------------------------|
+ | CancelResponseAsync | Cannot cancel a synchronous response |
+
+- Background Mode Enabled
+
+ - Non-streaming API - returns queued response immediately and allow polling for the status and result
+ | Method Call | Status | Result | Notes |
+ |-------------------------------------|-----------|---------------------------------|--------------------------------------------|
+ | CreateResponseAsync(msgs, opts, ct) | Queued | responseId | |
+ | GetResponseAsync(responseId, ct) | Queued | - | if called before the response is completed |
+ | GetResponseAsync(responseId, ct) | Queued | - | if called before the response is completed |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | response is less than 5 minutes old |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | response is more than 5 minutes old |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | response is more than 12 hours old |
+
+ The response started in background mode runs server-side until it completes, fails, or is cancelled. The client can poll for
+ the status of the response using its Id. If the client polls before the response is completed, it will get the latest status of the response.
+ If the client polls after the response is completed, it will get the completed response with the result.
+
+ | Cancellation Method | Result | Notes |
+ |---------------------|-----------|----------------------------------------|
+ | CancelResponseAsync | Cancelled | if cancelled before response completed |
+ | CancelResponseAsync | Completed | if cancelled after response completed |
+ | CancellationToken | No effect | it just cancels the client side call |
+
+ - Streaming API - returns streaming updates callers can iterate over immediately or after dropping the stream and picking it up later
+ | Method Call | Status | Result | Notes |
+ |----------------------------------------------|------------|--------------------------------------------------------------------------------|-------------------------------------------|
+ | CreateResponseStreamingAsync(msgs, opts, ct) | - | updates | |
+ | Iterating over updates | Queued | - | |
+ | Iterating over updates | Queued | - | |
+ | Iterating over updates | InProgress | - | |
+ | Iterating over updates | InProgress | - | |
+ | Iterating over updates | InProgress | The | |
+ | Iterating over updates | InProgress | capital | |
+ | Iterating over updates | InProgress | ... | |
+ | Iterating over updates | InProgress | Paris. | |
+ | Iterating over updates | Completed | The capital of France is Paris. | |
+ | GetStreamingResponseAsync(responseId, ct) | - | updates | response is less than 5 minutes old |
+ | Iterating over updates | Queued | - | |
+ | ... | ... | ... | |
+ | GetStreamingResponseAsync(responseId, ct) | - | HTTP 400 - Response can no longer be streamed, it is more than 5 minutes old. | response is more than 5 minutes old |
+ | GetResponseAsync(responseId, ct) | Completed | The capital of France is Paris. | accessing response that can't be streamed |
+
+ The streamed response that is not available after 5 minutes can be retrieved using the non-streaming API `GetResponseAsync`.
+
+ | Cancellation Method | Result | Notes |
+ |---------------------|------------------------------------|----------------------------------------|
+ | CancelResponseAsync | Canceled1 | if cancelled before response completed |
+ | CancelResponseAsync | Cannot cancel a completed response | if cancelled after response completed |
+ | CancellationToken | No effect | it just cancels the client side call |
+
+ 1 The CancelResponseAsync method returns `Canceled` status, but a subsequent call to GetResponseStreamingAsync returns
+ an enumerable that can be iterated over to get the rest of the response until it completes.
+
+
+
+
+Azure AI Foundry Agents
+
+- Create a thread and run the agent against it and wait for it to complete using polling:
+ ```csharp
+ // Create a thread with a message.
+ ThreadMessageOptions options = new(MessageRole.User, "What is SLM in AI?");
+ thread = await this._persistentAgentsClient!.Threads.CreateThreadAsync([options]);
+
+ // Run the agent on the thread.
+ ThreadRun threadRun = await this._persistentAgentsClient.Runs.CreateRunAsync(thread.Id, agent.Id);
+
+ // Poll for the run status.
+ // InProgress, Completed, Cancelling, Cancelled, Queued, Failed, RequiresAction, Expired
+ while (threadRun.Status == RunStatus.InProgress || threadRun.Status == RunStatus.Queued)
+ {
+ threadRun = await this._persistentAgentsClient.Runs.GetRunAsync(thread.Id, threadRun.Id);
+ }
+
+ // Access the run result.
+ await foreach (PersistentThreadMessage msg in this._persistentAgentsClient.Messages.GetMessagesAsync(thread.Id, threadRun.Id))
+ {
+ foreach (MessageContent content in msg.ContentItems)
+ {
+ switch (content)
+ {
+ case MessageTextContent textItem:
+ Console.WriteLine($" Text: {textItem.Text}");
+ //M1: In the context of Artificial Intelligence (AI), **SLM** often ...
+ //M2: What is SLM in AI?
+ break;
+ }
+ }
+ }
+ ```
+
+- Cancel an agent run:
+ ```csharp
+ // Create a thread with a message.
+ ThreadMessageOptions options = new(MessageRole.User, "What is SLM in AI?");
+ thread = await this._persistentAgentsClient!.Threads.CreateThreadAsync([options]);
+
+ // Run the agent on the thread.
+ ThreadRun threadRun = await this._persistentAgentsClient.Runs.CreateRunAsync(thread.Id, agent.Id);
+
+ Response cancellationResponse = await this._persistentAgentsClient.Runs.CancelRunAsync(thread.Id, threadRun.Id);
+ ```
+
+- Other agent run operations:
+ GetRunStepAsync
+
+
+
+
+A2A Agents
+
+- Send message to agent and handle the response
+ ```csharp
+ // Send message to the A2A agent.
+ A2AResponse response = await this.Client.SendMessageAsync(messageSendParams, cancellationToken).ConfigureAwait(false);
+
+ // Handle task responses.
+ if (response is AgentTask task)
+ {
+ while (task.Status.State == TaskState.Working)
+ {
+ task = await this.Client.GetTaskAsync(task.Id, cancellationToken).ConfigureAwait(false);
+ }
+
+ if (task.Artifacts != null && task.Artifacts.Count > 0)
+ {
+ foreach (var artifact in task.Artifacts)
+ {
+ foreach (var part in artifact.Parts)
+ {
+ if (part is TextPart textPart)
+ {
+ Console.WriteLine($"Result: {textPart.Text}");
+ }
+ }
+ }
+ Console.WriteLine();
+ }
+ }
+ // Handle message responses.
+ else if (response is Message message)
+ {
+ foreach (var part in message.Parts)
+ {
+ if (part is TextPart textPart)
+ {
+ Console.WriteLine($"Result: {textPart.Text}");
+ }
+ }
+ }
+ else
+ {
+ throw new InvalidOperationException("Unexpected response type from A2A client.");
+ }
+ ```
+
+- Cancel task
+ ```csharp
+ // Send message to the A2A agent.
+ A2AResponse response = await this.Client.SendMessageAsync(messageSendParams, cancellationToken).ConfigureAwait(false);
+
+ // Cancel the task
+ if (response is AgentTask task)
+ {
+ await this.Client.CancelTaskAsync(new TaskIdParams() { Id = task.Id }, cancellationToken).ConfigureAwait(false);
+ }
+ ```
+
+
\ No newline at end of file
diff --git a/docs/design/python-package-setup.md b/docs/design/python-package-setup.md
index 6aeb37e35f..1c7afbad1a 100644
--- a/docs/design/python-package-setup.md
+++ b/docs/design/python-package-setup.md
@@ -175,7 +175,7 @@ Sub-packages are comprised of two parts, the code itself and the dependencies, t
- Subpackage naming should also follow this, so in principle a package name is `-`, so `google-gemini`, `azure-purview`, `microsoft-copilotstudio`, etc. For smaller vendors, where it's less likely to have a multitude of connectors, we can skip the feature/brand part, so `mem0`, `redis`, etc.
- For Microsoft services we will have two vendor folders, `azure` and `microsoft`, where `azure` contains all Azure services, while `microsoft` contains other Microsoft services, such as Copilot Studio Agents.
-This setup was discussed at length and the decision is captured in [ADR-0007](../decisions/0007-python-subpackages.md).
+This setup was discussed at length and the decision is captured in [ADR-0008](../decisions/0008-python-subpackages.md).
#### Evolving the package structure
For each of the advanced components, we have two reason why we may split them into a folder, with an `__init__.py` and optionally a `_files.py`:
diff --git a/dotnet/agent-framework-dotnet.slnx b/dotnet/agent-framework-dotnet.slnx
index 1915050ac3..4901fa3eec 100644
--- a/dotnet/agent-framework-dotnet.slnx
+++ b/dotnet/agent-framework-dotnet.slnx
@@ -57,6 +57,7 @@
+
@@ -162,8 +163,14 @@
-
-
+
+
+
+
+
+
+
+
diff --git a/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/Agent_Step17_BackgroundResponses.csproj b/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/Agent_Step17_BackgroundResponses.csproj
new file mode 100644
index 0000000000..c5b2ae56a6
--- /dev/null
+++ b/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/Agent_Step17_BackgroundResponses.csproj
@@ -0,0 +1,20 @@
+
+
+
+ Exe
+ net9.0
+
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/Program.cs b/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/Program.cs
new file mode 100644
index 0000000000..bae2a2ab93
--- /dev/null
+++ b/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/Program.cs
@@ -0,0 +1,70 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+// This sample shows how to use background responses with ChatClientAgent and OpenAI Responses.
+
+using Azure.AI.OpenAI;
+using Azure.Identity;
+using Microsoft.Agents.AI;
+using OpenAI;
+
+var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set.");
+var deploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
+
+AIAgent agent = new AzureOpenAIClient(
+ new Uri(endpoint),
+ new AzureCliCredential())
+ .GetOpenAIResponseClient(deploymentName)
+ .CreateAIAgent(instructions: "You are good at telling jokes.", name: "Joker");
+
+// Enable background responses (only supported by OpenAI Responses at this time).
+AgentRunOptions options = new() { AllowBackgroundResponses = true };
+
+AgentThread thread = agent.GetNewThread();
+
+// Start the initial run.
+AgentRunResponse response = await agent.RunAsync("Tell me a joke about a pirate.", thread, options);
+
+// Poll until the response is complete.
+while (response.ContinuationToken is { } token)
+{
+ // Wait before polling again.
+ await Task.Delay(TimeSpan.FromSeconds(2));
+
+ // Continue with the token.
+ options.ContinuationToken = token;
+
+ response = await agent.RunAsync(thread, options);
+}
+
+// Display the result.
+Console.WriteLine(response.Text);
+
+// Reset options and thread for streaming.
+options = new() { AllowBackgroundResponses = true };
+thread = agent.GetNewThread();
+
+AgentRunResponseUpdate? lastReceivedUpdate = null;
+// Start streaming.
+await foreach (AgentRunResponseUpdate update in agent.RunStreamingAsync("Tell me a joke about a pirate.", thread, options))
+{
+ // Output each update.
+ Console.Write(update.Text);
+
+ // Track last update.
+ lastReceivedUpdate = update;
+
+ // Simulate connection loss after first piece of content received.
+ if (update.Text.Length > 0)
+ {
+ break;
+ }
+}
+
+// Resume from interruption point.
+options.ContinuationToken = lastReceivedUpdate?.ContinuationToken;
+
+await foreach (AgentRunResponseUpdate update in agent.RunStreamingAsync(thread, options))
+{
+ // Output each update.
+ Console.Write(update.Text);
+}
diff --git a/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/README.md b/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/README.md
new file mode 100644
index 0000000000..fa229ae439
--- /dev/null
+++ b/dotnet/samples/GettingStarted/Agents/Agent_Step17_BackgroundResponses/README.md
@@ -0,0 +1,22 @@
+# What This Sample Shows
+
+This sample demonstrates how to use background responses with ChatCompletionAgent and OpenAI Responses for long-running operations. Background responses support:
+
+- **Polling for completion** - Non-streaming APIs can start a background operation and return a continuation token. Poll with the token until the response completes.
+- **Resuming after interruption** - Streaming APIs can be interrupted and resumed from the last update using the continuation token.
+
+> **Note:** Background responses are currently only supported by OpenAI Responses.
+
+# Prerequisites
+
+Before you begin, ensure you have the following prerequisites:
+
+- .NET 8.0 SDK or later
+- OpenAI api key
+
+Set the following environment variables:
+
+```powershell
+$env:AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com/" # Replace with your Azure OpenAI resource endpoint
+$env:AZURE_OPENAI_DEPLOYMENT_NAME="gpt-4o-mini" # Optional, defaults to gpt-4o-mini
+```
\ No newline at end of file
diff --git a/dotnet/samples/GettingStarted/Agents/README.md b/dotnet/samples/GettingStarted/Agents/README.md
index 31290f9637..c306c8066a 100644
--- a/dotnet/samples/GettingStarted/Agents/README.md
+++ b/dotnet/samples/GettingStarted/Agents/README.md
@@ -42,6 +42,7 @@ Before you begin, ensure you have the following prerequisites:
|[Using middleware with an agent](./Agent_Step14_Middleware/)|This sample demonstrates how to use middleware with an agent|
|[Using plugins with an agent](./Agent_Step15_Plugins/)|This sample demonstrates how to use plugins with an agent|
|[Reducing chat history size](./Agent_Step16_ChatReduction/)|This sample demonstrates how to reduce the chat history to constrain its size, where chat history is maintained locally|
+|[Background responses](./Agent_Step17_BackgroundResponses/)|This sample demonstrates how to use background responses for long-running operations with polling and resumption support|
## Running the samples from the console
diff --git a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunOptions.cs b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunOptions.cs
index e845d055d5..c6a64915cf 100644
--- a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunOptions.cs
+++ b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunOptions.cs
@@ -10,9 +10,6 @@ namespace Microsoft.Agents.AI;
///
///
///
-/// This class currently has no options, but may be extended in the future to include additional configuration settings.
-///
-///
/// Implementations of may provide subclasses of with additional options specific to that agent type.
///
///
@@ -33,5 +30,48 @@ public AgentRunOptions()
public AgentRunOptions(AgentRunOptions options)
{
_ = Throw.IfNull(options);
+ this.ContinuationToken = options.ContinuationToken;
+ this.AllowBackgroundResponses = options.AllowBackgroundResponses;
}
+
+ ///
+ /// Gets or sets the continuation token for resuming and getting the result of the agent response identified by this token.
+ ///
+ ///
+ /// This property is used for background responses that can be activated via the
+ /// property if the implementation supports them.
+ /// Streamed background responses, such as those returned by default by
+ /// can be resumed if interrupted. This means that a continuation token obtained from the
+ /// of an update just before the interruption occurred can be passed to this property to resume the stream from the point of interruption.
+ /// Non-streamed background responses, such as those returned by ,
+ /// can be polled for completion by obtaining the token from the property
+ /// and passing it via this property on subsequent calls to .
+ ///
+ public object? ContinuationToken { get; set; }
+
+ ///
+ /// Gets or sets a value indicating whether the background responses are allowed.
+ ///
+ ///
+ ///
+ /// Background responses allow running long-running operations or tasks asynchronously in the background that can be resumed by streaming APIs
+ /// and polled for completion by non-streaming APIs.
+ ///
+ ///
+ /// When this property is set to true, non-streaming APIs may start a background operation and return an initial
+ /// response with a continuation token. Subsequent calls to the same API should be made in a polling manner with
+ /// the continuation token to get the final result of the operation.
+ ///
+ ///
+ /// When this property is set to true, streaming APIs may also start a background operation and begin streaming
+ /// response updates until the operation is completed. If the streaming connection is interrupted, the
+ /// continuation token obtained from the last update that has one should be supplied to a subsequent call to the same streaming API
+ /// to resume the stream from the point of interruption and continue receiving updates until the operation is completed.
+ ///
+ ///
+ /// This property only takes effect if the implementation it's used with supports background responses.
+ /// If the implementation does not support background responses, this property will be ignored.
+ ///
+ ///
+ public bool? AllowBackgroundResponses { get; set; }
}
diff --git a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponse.cs b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponse.cs
index b5714b5fc8..2beb287918 100644
--- a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponse.cs
+++ b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponse.cs
@@ -74,6 +74,7 @@ public AgentRunResponse(ChatResponse response)
this.RawRepresentation = response;
this.ResponseId = response.ResponseId;
this.Usage = response.Usage;
+ this.ContinuationToken = response.ContinuationToken;
}
///
@@ -159,6 +160,23 @@ public IList Messages
///
public string? ResponseId { get; set; }
+ ///
+ /// Gets or sets the continuation token for getting the result of a background agent response.
+ ///
+ ///
+ /// implementations that support background responses will return
+ /// a continuation token if background responses are allowed in
+ /// and the result of the response has not been obtained yet. If the response has completed and the result has been obtained,
+ /// the token will be .
+ ///
+ /// This property should be used in conjunction with to
+ /// continue to poll for the completion of the response. Pass this token to
+ /// on subsequent calls to
+ /// to poll for completion.
+ ///
+ ///
+ public object? ContinuationToken { get; set; }
+
///
/// Gets or sets the timestamp indicating when this response was created.
///
@@ -234,7 +252,7 @@ public AgentRunResponseUpdate[] ToAgentRunResponseUpdates()
{
extra = new AgentRunResponseUpdate
{
- AdditionalProperties = this.AdditionalProperties
+ AdditionalProperties = this.AdditionalProperties,
};
if (this.Usage is { } usage)
diff --git a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseExtensions.cs b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseExtensions.cs
index 01328cdab1..cb3ad7ec74 100644
--- a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseExtensions.cs
+++ b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseExtensions.cs
@@ -42,6 +42,7 @@ response.RawRepresentation as ChatResponse ??
RawRepresentation = response,
ResponseId = response.ResponseId,
Usage = response.Usage,
+ ContinuationToken = response.ContinuationToken,
};
}
@@ -74,6 +75,7 @@ responseUpdate.RawRepresentation as ChatResponseUpdate ??
RawRepresentation = responseUpdate,
ResponseId = responseUpdate.ResponseId,
Role = responseUpdate.Role,
+ ContinuationToken = responseUpdate.ContinuationToken,
};
}
diff --git a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseUpdate.cs b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseUpdate.cs
index 347ca8de5d..954893dbcb 100644
--- a/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseUpdate.cs
+++ b/dotnet/src/Microsoft.Agents.AI.Abstractions/AgentRunResponseUpdate.cs
@@ -78,6 +78,7 @@ public AgentRunResponseUpdate(ChatResponseUpdate chatResponseUpdate)
this.RawRepresentation = chatResponseUpdate;
this.ResponseId = chatResponseUpdate.ResponseId;
this.Role = chatResponseUpdate.Role;
+ this.ContinuationToken = chatResponseUpdate.ContinuationToken;
}
/// Gets or sets the name of the author of the response update.
@@ -148,6 +149,21 @@ public IList Contents
/// Gets or sets a timestamp for the response update.
public DateTimeOffset? CreatedAt { get; set; }
+ ///
+ /// Gets or sets the continuation token for resuming the streamed agent response of which this update is a part.
+ ///
+ ///
+ /// implementations that support background responses will return
+ /// a continuation token on each update if background responses are allowed in
+ /// except for the last update, for which the token will be .
+ ///
+ /// This property should be used for stream resumption, where the continuation token of the latest received update should be
+ /// passed to on subsequent calls to
+ /// to resume streaming from the point of interruption.
+ ///
+ ///
+ public object? ContinuationToken { get; set; }
+
///
public override string ToString() => this.Text;
diff --git a/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs b/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs
index edca371f5d..27a9a9ffd3 100644
--- a/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs
+++ b/dotnet/src/Microsoft.Agents.AI/ChatClient/ChatClientAgent.cs
@@ -436,13 +436,13 @@ await thread.AIContextProvider.InvokedAsync(new(inputMessages) { InvokeException
// If no agent chat options were provided, return the request chat options as is.
if (this._agentOptions?.ChatOptions is null)
{
- return requestChatOptions;
+ return ApplyBackgroundResponsesProperties(requestChatOptions, runOptions);
}
// If no request chat options were provided, use the agent's chat options clone.
if (requestChatOptions is null)
{
- return this._agentOptions?.ChatOptions.Clone();
+ return ApplyBackgroundResponsesProperties(this._agentOptions?.ChatOptions.Clone(), runOptions);
}
// If both are present, we need to merge them.
@@ -532,7 +532,20 @@ await thread.AIContextProvider.InvokedAsync(new(inputMessages) { InvokeException
}
}
- return requestChatOptions;
+ return ApplyBackgroundResponsesProperties(requestChatOptions, runOptions);
+
+ static ChatOptions? ApplyBackgroundResponsesProperties(ChatOptions? chatOptions, AgentRunOptions? agentRunOptions)
+ {
+ // If any of the background response properties are set in the run options, we should apply both to the chat options.
+ if (agentRunOptions?.AllowBackgroundResponses is not null || agentRunOptions?.ContinuationToken is not null)
+ {
+ chatOptions ??= new ChatOptions();
+ chatOptions.AllowBackgroundResponses = agentRunOptions.AllowBackgroundResponses;
+ chatOptions.ContinuationToken = agentRunOptions.ContinuationToken;
+ }
+
+ return chatOptions;
+ }
}
///
@@ -551,49 +564,66 @@ await thread.AIContextProvider.InvokedAsync(new(inputMessages) { InvokeException
{
ChatOptions? chatOptions = this.CreateConfiguredChatOptions(runOptions);
+ // Supplying a thread for background responses is required to prevent inconsistent experience
+ // for callers if they forget to provide the thread for initial or follow-up runs.
+ if (chatOptions?.AllowBackgroundResponses is true && thread is null)
+ {
+ throw new InvalidOperationException("A thread must be provided when continuing a background response with a continuation token.");
+ }
+
thread ??= this.GetNewThread();
if (thread is not ChatClientAgentThread typedThread)
{
throw new InvalidOperationException("The provided thread is not compatible with the agent. Only threads created by the agent can be used.");
}
- // Add any existing messages from the thread to the messages to be sent to the chat client.
- List threadMessages = [];
- if (typedThread.MessageStore is not null)
+ // Supplying messages when continuing a background response is not allowed.
+ if (chatOptions?.ContinuationToken is not null && inputMessages.Any())
{
- threadMessages.AddRange(await typedThread.MessageStore.GetMessagesAsync(cancellationToken).ConfigureAwait(false));
+ throw new InvalidOperationException("Input messages are not allowed when continuing a background response using a continuation token.");
}
+ List threadMessages = [];
- // If we have an AIContextProvider, we should get context from it, and update our
- // messages and options with the additional context.
- if (typedThread.AIContextProvider is not null)
+ // Populate the thread messages only if we are not continuing an existing response as it's not allowed
+ if (chatOptions?.ContinuationToken is null)
{
- var invokingContext = new AIContextProvider.InvokingContext(inputMessages);
- var aiContext = await typedThread.AIContextProvider.InvokingAsync(invokingContext, cancellationToken).ConfigureAwait(false);
- if (aiContext.Messages is { Count: > 0 })
+ // Add any existing messages from the thread to the messages to be sent to the chat client.
+ if (typedThread.MessageStore is not null)
{
- threadMessages.AddRange(aiContext.Messages);
+ threadMessages.AddRange(await typedThread.MessageStore.GetMessagesAsync(cancellationToken).ConfigureAwait(false));
}
- if (aiContext.Tools is { Count: > 0 })
+ // If we have an AIContextProvider, we should get context from it, and update our
+ // messages and options with the additional context.
+ if (typedThread.AIContextProvider is not null)
{
- chatOptions ??= new();
- chatOptions.Tools ??= [];
- foreach (AITool tool in aiContext.Tools)
+ var invokingContext = new AIContextProvider.InvokingContext(inputMessages);
+ var aiContext = await typedThread.AIContextProvider.InvokingAsync(invokingContext, cancellationToken).ConfigureAwait(false);
+ if (aiContext.Messages is { Count: > 0 })
{
- chatOptions.Tools.Add(tool);
+ threadMessages.AddRange(aiContext.Messages);
}
- }
- if (aiContext.Instructions is not null)
- {
- chatOptions ??= new();
- chatOptions.Instructions = string.IsNullOrWhiteSpace(chatOptions.Instructions) ? aiContext.Instructions : $"{chatOptions.Instructions}\n{aiContext.Instructions}";
+ if (aiContext.Tools is { Count: > 0 })
+ {
+ chatOptions ??= new();
+ chatOptions.Tools ??= [];
+ foreach (AITool tool in aiContext.Tools)
+ {
+ chatOptions.Tools.Add(tool);
+ }
+ }
+
+ if (aiContext.Instructions is not null)
+ {
+ chatOptions ??= new();
+ chatOptions.Instructions = string.IsNullOrWhiteSpace(chatOptions.Instructions) ? aiContext.Instructions : $"{chatOptions.Instructions}\n{aiContext.Instructions}";
+ }
}
- }
- // Add the input messages to the end of thread messages.
- threadMessages.AddRange(inputMessages);
+ // Add the input messages to the end of thread messages.
+ threadMessages.AddRange(inputMessages);
+ }
// If a user provided two different thread ids, via the thread object and options, we should throw
// since we don't know which one to use.
diff --git a/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunOptionsTests.cs b/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunOptionsTests.cs
index 097d4bdab7..40901a4969 100644
--- a/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunOptionsTests.cs
+++ b/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunOptionsTests.cs
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft. All rights reserved.
using System;
+using System.Text.Json;
+using Microsoft.Extensions.AI;
namespace Microsoft.Agents.AI.Abstractions.UnitTests;
@@ -13,15 +15,44 @@ public class AgentRunOptionsTests
public void CloningConstructorCopiesProperties()
{
// Arrange
- var options = new AgentRunOptions();
+ var options = new AgentRunOptions
+ {
+ ContinuationToken = new object(),
+ AllowBackgroundResponses = true
+ };
// Act
var clone = new AgentRunOptions(options);
+
+ // Assert
Assert.NotNull(clone);
+ Assert.Same(options.ContinuationToken, clone.ContinuationToken);
+ Assert.Equal(options.AllowBackgroundResponses, clone.AllowBackgroundResponses);
}
[Fact]
public void CloningConstructorThrowsIfNull() =>
// Act & Assert
Assert.Throws(() => new AgentRunOptions(null!));
+
+ [Fact]
+ public void JsonSerializationRoundtrips()
+ {
+ // Arrange
+ var options = new AgentRunOptions
+ {
+ ContinuationToken = ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }),
+ AllowBackgroundResponses = true
+ };
+
+ // Act
+ string json = JsonSerializer.Serialize(options, AgentAbstractionsJsonUtilities.DefaultOptions);
+
+ var deserialized = JsonSerializer.Deserialize(json, AgentAbstractionsJsonUtilities.DefaultOptions);
+
+ // Assert
+ Assert.NotNull(deserialized);
+ Assert.Equivalent(ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }), deserialized!.ContinuationToken);
+ Assert.Equal(options.AllowBackgroundResponses, deserialized.AllowBackgroundResponses);
+ }
}
diff --git a/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseTests.cs b/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseTests.cs
index 0273d78cba..981f1e3933 100644
--- a/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseTests.cs
+++ b/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseTests.cs
@@ -19,10 +19,12 @@ public void ConstructorWithNullEmptyArgsIsValid()
response = new();
Assert.Empty(response.Messages);
Assert.Empty(response.Text);
+ Assert.Null(response.ContinuationToken);
response = new((IList?)null);
Assert.Empty(response.Messages);
Assert.Empty(response.Text);
+ Assert.Null(response.ContinuationToken);
Assert.Throws("message", () => new AgentRunResponse((ChatMessage)null!));
}
@@ -55,6 +57,7 @@ public void ConstructorWithChatResponseRoundtrips()
RawRepresentation = new object(),
ResponseId = "responseId",
Usage = new UsageDetails(),
+ ContinuationToken = ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }),
};
AgentRunResponse response = new(chatResponse);
@@ -64,6 +67,7 @@ public void ConstructorWithChatResponseRoundtrips()
Assert.Equal(chatResponse.ResponseId, response.ResponseId);
Assert.Same(chatResponse, response.RawRepresentation as ChatResponse);
Assert.Same(chatResponse.Usage, response.Usage);
+ Assert.Equivalent(ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }), response.ContinuationToken);
}
[Fact]
@@ -97,6 +101,10 @@ public void PropertiesRoundtrip()
AdditionalPropertiesDictionary additionalProps = [];
response.AdditionalProperties = additionalProps;
Assert.Same(additionalProps, response.AdditionalProperties);
+
+ Assert.Null(response.ContinuationToken);
+ response.ContinuationToken = ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 });
+ Assert.Equivalent(ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }), response.ContinuationToken);
}
[Fact]
@@ -110,11 +118,12 @@ public void JsonSerializationRoundtrips()
Usage = new UsageDetails(),
RawRepresentation = new(),
AdditionalProperties = new() { ["key"] = "value" },
+ ContinuationToken = ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }),
};
- string json = JsonSerializer.Serialize(original, TestJsonSerializerContext.Default.AgentRunResponse);
+ string json = JsonSerializer.Serialize(original, AgentAbstractionsJsonUtilities.DefaultOptions);
- AgentRunResponse? result = JsonSerializer.Deserialize(json, TestJsonSerializerContext.Default.AgentRunResponse);
+ AgentRunResponse? result = JsonSerializer.Deserialize(json, AgentAbstractionsJsonUtilities.DefaultOptions);
Assert.NotNull(result);
Assert.Equal(ChatRole.Assistant, result.Messages.Single().Role);
@@ -130,6 +139,7 @@ public void JsonSerializationRoundtrips()
Assert.True(result.AdditionalProperties.TryGetValue("key", out object? value));
Assert.IsType(value);
Assert.Equal("value", ((JsonElement)value!).GetString());
+ Assert.Equivalent(ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }), result.ContinuationToken);
}
[Fact]
diff --git a/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseUpdateTests.cs b/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseUpdateTests.cs
index be382cac20..42d3fdf199 100644
--- a/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseUpdateTests.cs
+++ b/dotnet/tests/Microsoft.Agents.AI.Abstractions.UnitTests/AgentRunResponseUpdateTests.cs
@@ -23,6 +23,7 @@ public void ConstructorPropsDefaulted()
Assert.Null(update.MessageId);
Assert.Null(update.CreatedAt);
Assert.Equal(string.Empty, update.ToString());
+ Assert.Null(update.ContinuationToken);
}
[Fact]
@@ -41,6 +42,7 @@ public void ConstructorWithChatResponseUpdateRoundtrips()
RawRepresentation = new object(),
ResponseId = "responseId",
Role = ChatRole.Assistant,
+ ContinuationToken = new object(),
};
AgentRunResponseUpdate response = new(chatResponseUpdate);
@@ -52,6 +54,7 @@ public void ConstructorWithChatResponseUpdateRoundtrips()
Assert.Same(chatResponseUpdate, response.RawRepresentation as ChatResponseUpdate);
Assert.Equal(chatResponseUpdate.ResponseId, response.ResponseId);
Assert.Equal(chatResponseUpdate.Role, response.Role);
+ Assert.Same(chatResponseUpdate.ContinuationToken, response.ContinuationToken);
}
[Fact]
@@ -102,6 +105,10 @@ public void PropertiesRoundtrip()
Assert.Null(update.CreatedAt);
update.CreatedAt = new DateTimeOffset(2022, 1, 1, 0, 0, 0, TimeSpan.Zero);
Assert.Equal(new DateTimeOffset(2022, 1, 1, 0, 0, 0, TimeSpan.Zero), update.CreatedAt);
+
+ Assert.Null(update.ContinuationToken);
+ update.ContinuationToken = ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 });
+ Assert.Equivalent(ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }), update.ContinuationToken);
}
[Fact]
@@ -152,11 +159,12 @@ public void JsonSerializationRoundtrips()
MessageId = "messageid",
CreatedAt = new DateTimeOffset(2022, 1, 1, 0, 0, 0, TimeSpan.Zero),
AdditionalProperties = new() { ["key"] = "value" },
+ ContinuationToken = ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 })
};
- string json = JsonSerializer.Serialize(original, TestJsonSerializerContext.Default.AgentRunResponseUpdate);
+ string json = JsonSerializer.Serialize(original, AgentAbstractionsJsonUtilities.DefaultOptions);
- AgentRunResponseUpdate? result = JsonSerializer.Deserialize(json, TestJsonSerializerContext.Default.AgentRunResponseUpdate);
+ AgentRunResponseUpdate? result = JsonSerializer.Deserialize(json, AgentAbstractionsJsonUtilities.DefaultOptions);
Assert.NotNull(result);
Assert.Equal(5, result.Contents.Count);
@@ -187,5 +195,8 @@ public void JsonSerializationRoundtrips()
Assert.True(result.AdditionalProperties.TryGetValue("key", out object? value));
Assert.IsType(value);
Assert.Equal("value", ((JsonElement)value!).GetString());
+
+ Assert.NotNull(result.ContinuationToken);
+ Assert.Equivalent(ResponseContinuationToken.FromBytes(new byte[] { 1, 2, 3 }), result.ContinuationToken);
}
}
diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentTests.cs
index f20f7fe082..9debdd3ec0 100644
--- a/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentTests.cs
+++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/ChatClient/ChatClientAgentTests.cs
@@ -1951,6 +1951,499 @@ public void GetNewThreadUsesAIContextProviderFactoryIfProvided()
#endregion
+ #region Background Responses Tests
+
+ [Theory]
+ [InlineData(true)]
+ [InlineData(false)]
+ public async Task RunAsyncPropagatesBackgroundResponsesPropertiesToChatClientAsync(bool providePropsViaChatOptions)
+ {
+ // Arrange
+ object continuationToken = new();
+ ChatOptions? capturedChatOptions = null;
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Callback, ChatOptions, CancellationToken>((m, co, ct) => capturedChatOptions = co)
+ .ReturnsAsync(new ChatResponse([new(ChatRole.Assistant, "response")]) { ContinuationToken = null });
+
+ AgentRunOptions agentRunOptions;
+
+ if (providePropsViaChatOptions)
+ {
+ ChatOptions chatOptions = new()
+ {
+ AllowBackgroundResponses = true,
+ ContinuationToken = continuationToken
+ };
+
+ agentRunOptions = new ChatClientAgentRunOptions(chatOptions);
+ }
+ else
+ {
+ agentRunOptions = new AgentRunOptions()
+ {
+ AllowBackgroundResponses = true,
+ ContinuationToken = continuationToken
+ };
+ }
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ ChatClientAgentThread thread = new();
+
+ // Act
+ await agent.RunAsync(thread, options: agentRunOptions);
+
+ // Assert
+ Assert.NotNull(capturedChatOptions);
+ Assert.True(capturedChatOptions.AllowBackgroundResponses);
+ Assert.Same(continuationToken, capturedChatOptions.ContinuationToken);
+ }
+
+ [Fact]
+ public async Task RunAsyncPrioritizesBackgroundResponsesPropertiesFromAgentRunOptionsOverOnesFromChatOptionsAsync()
+ {
+ // Arrange
+ object continuationToken1 = new();
+ object continuationToken2 = new();
+ ChatOptions? capturedChatOptions = null;
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Callback, ChatOptions, CancellationToken>((m, co, ct) => capturedChatOptions = co)
+ .ReturnsAsync(new ChatResponse([new(ChatRole.Assistant, "response")]) { ContinuationToken = null });
+
+ ChatOptions chatOptions = new()
+ {
+ AllowBackgroundResponses = true,
+ ContinuationToken = continuationToken1
+ };
+
+ ChatClientAgentRunOptions agentRunOptions = new(chatOptions)
+ {
+ AllowBackgroundResponses = false,
+ ContinuationToken = continuationToken2
+ };
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ // Act
+ await agent.RunAsync(options: agentRunOptions);
+
+ // Assert
+ Assert.NotNull(capturedChatOptions);
+ Assert.False(capturedChatOptions.AllowBackgroundResponses);
+ Assert.Same(continuationToken2, capturedChatOptions.ContinuationToken);
+ }
+
+ [Theory]
+ [InlineData(true)]
+ [InlineData(false)]
+ public async Task RunStreamingAsyncPropagatesBackgroundResponsesPropertiesToChatClientAsync(bool providePropsViaChatOptions)
+ {
+ // Arrange
+ ChatResponseUpdate[] returnUpdates =
+ [
+ new ChatResponseUpdate(role: ChatRole.Assistant, content: "wh"),
+ new ChatResponseUpdate(role: ChatRole.Assistant, content: "at?"),
+ ];
+
+ object continuationToken = new();
+ ChatOptions? capturedChatOptions = null;
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetStreamingResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Callback, ChatOptions, CancellationToken>((m, co, ct) => capturedChatOptions = co)
+ .Returns(ToAsyncEnumerableAsync(returnUpdates));
+
+ AgentRunOptions agentRunOptions;
+
+ if (providePropsViaChatOptions)
+ {
+ ChatOptions chatOptions = new()
+ {
+ AllowBackgroundResponses = true,
+ ContinuationToken = continuationToken
+ };
+
+ agentRunOptions = new ChatClientAgentRunOptions(chatOptions);
+ }
+ else
+ {
+ agentRunOptions = new AgentRunOptions()
+ {
+ AllowBackgroundResponses = true,
+ ContinuationToken = continuationToken
+ };
+ }
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ ChatClientAgentThread thread = new();
+
+ // Act
+ await foreach (var _ in agent.RunStreamingAsync(thread, options: agentRunOptions))
+ {
+ }
+
+ // Assert
+ Assert.NotNull(capturedChatOptions);
+
+ Assert.True(capturedChatOptions.AllowBackgroundResponses);
+ Assert.Same(continuationToken, capturedChatOptions.ContinuationToken);
+ }
+
+ [Fact]
+ public async Task RunStreamingAsyncPrioritizesBackgroundResponsesPropertiesFromAgentRunOptionsOverOnesFromChatOptionsAsync()
+ {
+ // Arrange
+ ChatResponseUpdate[] returnUpdates =
+ [
+ new ChatResponseUpdate(role: ChatRole.Assistant, content: "wh"),
+ ];
+
+ object continuationToken1 = new();
+ object continuationToken2 = new();
+ ChatOptions? capturedChatOptions = null;
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetStreamingResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Callback, ChatOptions, CancellationToken>((m, co, ct) => capturedChatOptions = co)
+ .Returns(ToAsyncEnumerableAsync(returnUpdates));
+
+ ChatOptions chatOptions = new()
+ {
+ AllowBackgroundResponses = true,
+ ContinuationToken = continuationToken1
+ };
+
+ ChatClientAgentRunOptions agentRunOptions = new(chatOptions)
+ {
+ AllowBackgroundResponses = false,
+ ContinuationToken = continuationToken2
+ };
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ // Act
+ await foreach (var _ in agent.RunStreamingAsync(options: agentRunOptions))
+ {
+ }
+
+ // Assert
+ Assert.NotNull(capturedChatOptions);
+ Assert.False(capturedChatOptions.AllowBackgroundResponses);
+ Assert.Same(continuationToken2, capturedChatOptions.ContinuationToken);
+ }
+
+ [Fact]
+ public async Task RunAsyncPropagatesContinuationTokenFromChatResponseToAgentRunResponseAsync()
+ {
+ // Arrange
+ object continuationToken = new();
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .ReturnsAsync(new ChatResponse([new(ChatRole.Assistant, "partial")]) { ContinuationToken = continuationToken });
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+ var runOptions = new ChatClientAgentRunOptions(new ChatOptions { AllowBackgroundResponses = true });
+
+ ChatClientAgentThread thread = new();
+
+ // Act
+ var response = await agent.RunAsync([new(ChatRole.User, "hi")], thread, options: runOptions);
+
+ // Assert
+ Assert.Same(continuationToken, response.ContinuationToken);
+ }
+
+ [Fact]
+ public async Task RunStreamingAsyncPropagatesContinuationTokensFromUpdatesAsync()
+ {
+ // Arrange
+ object token1 = new();
+ ChatResponseUpdate[] expectedUpdates =
+ [
+ new ChatResponseUpdate(ChatRole.Assistant, "pa") { ContinuationToken = token1 },
+ new ChatResponseUpdate(ChatRole.Assistant, "rt") { ContinuationToken = null } // terminal
+ ];
+
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetStreamingResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Returns(ToAsyncEnumerableAsync(expectedUpdates));
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ ChatClientAgentThread thread = new();
+
+ // Act
+ var actualUpdates = new List();
+ await foreach (var u in agent.RunStreamingAsync([new(ChatRole.User, "hi")], thread, options: new ChatClientAgentRunOptions(new ChatOptions { AllowBackgroundResponses = true })))
+ {
+ actualUpdates.Add(u);
+ }
+
+ // Assert
+ Assert.Equal(2, actualUpdates.Count);
+ Assert.Same(token1, actualUpdates[0].ContinuationToken);
+ Assert.Null(actualUpdates[1].ContinuationToken); // last update has null token
+ }
+
+ [Fact]
+ public async Task RunAsyncThrowsWhenMessagesProvidedWithContinuationTokenAsync()
+ {
+ // Arrange
+ Mock mockChatClient = new();
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ AgentRunOptions runOptions = new() { ContinuationToken = new() };
+
+ IEnumerable inputMessages = [new ChatMessage(ChatRole.User, "test message")];
+
+ // Act & Assert
+ await Assert.ThrowsAsync(() => agent.RunAsync(inputMessages, options: runOptions));
+
+ // Verify that the IChatClient was never called due to early validation
+ mockChatClient.Verify(
+ c => c.GetResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()),
+ Times.Never);
+ }
+
+ [Fact]
+ public async Task RunStreamingAsyncThrowsWhenMessagesProvidedWithContinuationTokenAsync()
+ {
+ // Arrange
+ Mock mockChatClient = new();
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ AgentRunOptions runOptions = new() { ContinuationToken = new() };
+
+ IEnumerable inputMessages = [new ChatMessage(ChatRole.User, "test message")];
+
+ // Act & Assert
+ await Assert.ThrowsAsync(async () =>
+ {
+ await foreach (var update in agent.RunStreamingAsync(inputMessages, options: runOptions))
+ {
+ // Should not reach here
+ }
+ });
+
+ // Verify that the IChatClient was never called due to early validation
+ mockChatClient.Verify(
+ c => c.GetStreamingResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()),
+ Times.Never);
+ }
+
+ [Fact]
+ public async Task RunAsyncSkipsThreadMessagePopulationWithContinuationTokenAsync()
+ {
+ // Arrange
+ List capturedMessages = [];
+
+ // Create a mock message store that would normally provide messages
+ var mockMessageStore = new Mock();
+ mockMessageStore
+ .Setup(ms => ms.GetMessagesAsync(It.IsAny()))
+ .ReturnsAsync([new(ChatRole.User, "Message from message store")]);
+
+ // Create a mock AI context provider that would normally provide context
+ var mockContextProvider = new Mock();
+ mockContextProvider
+ .Setup(p => p.InvokingAsync(It.IsAny(), It.IsAny()))
+ .ReturnsAsync(new AIContext
+ {
+ Messages = [new(ChatRole.System, "Message from AI context")],
+ Instructions = "context instructions"
+ });
+
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Callback, ChatOptions, CancellationToken>((msgs, opts, ct) =>
+ capturedMessages.AddRange(msgs))
+ .ReturnsAsync(new ChatResponse([new(ChatRole.Assistant, "continued response")]));
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ // Create a thread with both message store and AI context provider
+ ChatClientAgentThread thread = new()
+ {
+ MessageStore = mockMessageStore.Object,
+ AIContextProvider = mockContextProvider.Object
+ };
+
+ AgentRunOptions runOptions = new() { ContinuationToken = new() };
+
+ // Act
+ await agent.RunAsync([], thread, options: runOptions);
+
+ // Assert
+
+ // With continuation token, thread message population should be skipped
+ Assert.Empty(capturedMessages);
+
+ // Verify that message store was never called due to continuation token
+ mockMessageStore.Verify(
+ ms => ms.GetMessagesAsync(It.IsAny()),
+ Times.Never);
+
+ // Verify that AI context provider was never called due to continuation token
+ mockContextProvider.Verify(
+ p => p.InvokingAsync(It.IsAny(), It.IsAny()),
+ Times.Never);
+ }
+
+ [Fact]
+ public async Task RunStreamingAsyncSkipsThreadMessagePopulationWithContinuationTokenAsync()
+ {
+ // Arrange
+ List capturedMessages = [];
+
+ // Create a mock message store that would normally provide messages
+ var mockMessageStore = new Mock();
+ mockMessageStore
+ .Setup(ms => ms.GetMessagesAsync(It.IsAny()))
+ .ReturnsAsync([new(ChatRole.User, "Message from message store")]);
+
+ // Create a mock AI context provider that would normally provide context
+ var mockContextProvider = new Mock();
+ mockContextProvider
+ .Setup(p => p.InvokingAsync(It.IsAny(), It.IsAny()))
+ .ReturnsAsync(new AIContext
+ {
+ Messages = [new(ChatRole.System, "Message from AI context")],
+ Instructions = "context instructions"
+ });
+
+ Mock mockChatClient = new();
+ mockChatClient
+ .Setup(c => c.GetStreamingResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()))
+ .Callback, ChatOptions, CancellationToken>((msgs, opts, ct) =>
+ capturedMessages.AddRange(msgs))
+ .Returns(ToAsyncEnumerableAsync([new ChatResponseUpdate(role: ChatRole.Assistant, content: "continued response")]));
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ // Create a thread with both message store and AI context provider
+ ChatClientAgentThread thread = new()
+ {
+ MessageStore = mockMessageStore.Object,
+ AIContextProvider = mockContextProvider.Object
+ };
+
+ AgentRunOptions runOptions = new() { ContinuationToken = new() };
+
+ // Act
+ await agent.RunStreamingAsync([], thread, options: runOptions).ToListAsync();
+
+ // Assert
+
+ // With continuation token, thread message population should be skipped
+ Assert.Empty(capturedMessages);
+
+ // Verify that message store was never called due to continuation token
+ mockMessageStore.Verify(
+ ms => ms.GetMessagesAsync(It.IsAny()),
+ Times.Never);
+
+ // Verify that AI context provider was never called due to continuation token
+ mockContextProvider.Verify(
+ p => p.InvokingAsync(It.IsAny(), It.IsAny()),
+ Times.Never);
+ }
+
+ [Fact]
+ public async Task RunAsyncThrowsWhenNoThreadProvideForBackgroundResponsesAsync()
+ {
+ // Arrange
+ Mock mockChatClient = new();
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ AgentRunOptions runOptions = new() { AllowBackgroundResponses = true };
+
+ IEnumerable inputMessages = [new ChatMessage(ChatRole.User, "test message")];
+
+ // Act & Assert
+ await Assert.ThrowsAsync(() => agent.RunAsync(inputMessages, options: runOptions));
+
+ // Verify that the IChatClient was never called due to early validation
+ mockChatClient.Verify(
+ c => c.GetResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()),
+ Times.Never);
+ }
+
+ [Fact]
+ public async Task RunStreamingAsyncThrowsWhenNoThreadProvideForBackgroundResponsesAsync()
+ {
+ // Arrange
+ Mock mockChatClient = new();
+
+ ChatClientAgent agent = new(mockChatClient.Object);
+
+ AgentRunOptions runOptions = new() { AllowBackgroundResponses = true };
+
+ IEnumerable inputMessages = [new ChatMessage(ChatRole.User, "test message")];
+
+ // Act & Assert
+ await Assert.ThrowsAsync(async () =>
+ {
+ await foreach (var update in agent.RunStreamingAsync(inputMessages, options: runOptions))
+ {
+ // Should not reach here
+ }
+ });
+
+ // Verify that the IChatClient was never called due to early validation
+ mockChatClient.Verify(
+ c => c.GetStreamingResponseAsync(
+ It.IsAny>(),
+ It.IsAny(),
+ It.IsAny()),
+ Times.Never);
+ }
+
+ #endregion
+
private static async IAsyncEnumerable ToAsyncEnumerableAsync(IEnumerable values)
{
await Task.Yield();