microsoft · moonbox3 · Apr 29, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.gitignore b/.gitignore
@@ -242,3 +242,7 @@ python/dotnet-ref
 # Generated filtered solution files (created by eng/scripts/New-FilteredSolution.ps1)
 dotnet/filtered-*.slnx
 **/*.lscache
+
+# Local tool state
+.omc/
+.omx/
diff --git a/python/packages/core/agent_framework/_workflows/_agent.py b/python/packages/core/agent_framework/_workflows/_agent.py
@@ -528,6 +528,7 @@ def _convert_workflow_events_to_agent_response(
                 raw_representations.append(output_event)
             else:
                 data = output_event.data
+
                 if isinstance(data, AgentResponseUpdate):
                     # We cannot support AgentResponseUpdate in non-streaming mode. This is because the message
                     # sequence cannot be guaranteed when there are streaming updates in between non-streaming
@@ -628,16 +629,23 @@ def _convert_workflow_event_to_agent_response_updates(
             A list of AgentResponseUpdate objects. Empty list if the event is not relevant.
         """
         if event.type == "output":
-            # Convert workflow output to agent response updates.
-            # Handle different data types appropriately.
             data = event.data
             executor_id = event.executor_id
 
             if isinstance(data, AgentResponseUpdate):
-                # Pass through AgentResponseUpdate directly (streaming from AgentExecutor)
-                if not data.author_name:
-                    data.author_name = executor_id
-                return [data]
+                # Construct a fresh AgentResponseUpdate so we don't mutate a payload
+                # that AgentExecutor still holds a reference to in its `updates` list.
+                return [
+                    AgentResponseUpdate(
+                        contents=list(data.contents),
+                        role=data.role,
+                        author_name=data.author_name or executor_id,
+                        response_id=data.response_id,
+                        message_id=data.message_id,
+                        created_at=data.created_at,
+                        raw_representation=data.raw_representation,
+                    )
+                ]
             if isinstance(data, AgentResponse):
                 # Convert each message in AgentResponse to an AgentResponseUpdate
                 updates: list[AgentResponseUpdate] = []

diff --git a/python/packages/core/agent_framework/_workflows/_agent_executor.py b/python/packages/core/agent_framework/_workflows/_agent_executor.py
@@ -156,8 +156,9 @@ def __init__(
                    the agent run.
                 - "custom": use the provided context_filter function to determine which messages to include
                    as context for the agent run.
-            context_filter: An optional function for filtering conversation context when context_mode is set
-                to "custom".
+            context_filter: A function that takes the full conversation (list of Messages) as input and returns
+                a filtered list of Messages to be used as context for the agent run. This is required
+                if context_mode is set to "custom".
         """
         # Prefer provided id; else use agent.name if present; else generate deterministic prefix
         exec_id = id or resolve_agent_id(agent)

diff --git a/python/packages/core/agent_framework/_workflows/_workflow_executor.py b/python/packages/core/agent_framework/_workflows/_workflow_executor.py
@@ -361,7 +361,7 @@ def can_handle(self, message: WorkflowMessage) -> bool:
         return any(is_instance_of(message.data, input_type) for input_type in self.workflow.input_types)
 
     @handler
-    async def process_workflow(self, input_data: object, ctx: WorkflowContext[Any]) -> None:
+    async def process_workflow(self, input_data: object, ctx: WorkflowContext[Any, Any]) -> None:
         """Execute the sub-workflow with raw input data.
 
         This handler starts a new sub-workflow execution. When the sub-workflow
@@ -428,7 +428,7 @@ async def process_workflow(self, input_data: object, ctx: WorkflowContext[Any])
     async def handle_message_wrapped_request_response(
         self,
         response: SubWorkflowResponseMessage,
-        ctx: WorkflowContext[Any],
+        ctx: WorkflowContext[Any, Any],
     ) -> None:
         """Handle response from parent for a forwarded request.
 

diff --git a/python/packages/core/tests/workflow/test_workflow_kwargs.py b/python/packages/core/tests/workflow/test_workflow_kwargs.py
@@ -232,16 +232,18 @@ def simple_selector(state: GroupChatState) -> str:
 
 async def test_kwargs_stored_in_state() -> None:
     """Test that function_invocation_kwargs are stored in State with the correct key."""
-    from agent_framework import Executor, WorkflowContext, handler
+    from typing_extensions import Never
+
+    from agent_framework import AgentResponse, Executor, WorkflowContext, handler
 
     stored_kwargs: dict[str, Any] | None = None
 
     class _StateInspector(Executor):
         @handler
-        async def inspect(self, msgs: list[Message], ctx: WorkflowContext[list[Message]]) -> None:
+        async def inspect(self, msgs: list[Message], ctx: WorkflowContext[Never, AgentResponse]) -> None:
             nonlocal stored_kwargs
             stored_kwargs = ctx.get_state(WORKFLOW_RUN_KWARGS_KEY)
-            await ctx.send_message(msgs)
+            await ctx.yield_output(AgentResponse(messages=msgs))
 
     inspector = _StateInspector(id="inspector")
     workflow = SequentialBuilder(participants=[inspector]).build()
@@ -256,16 +258,18 @@ async def inspect(self, msgs: list[Message], ctx: WorkflowContext[list[Message]]
 
 async def test_empty_kwargs_stored_as_empty_dict() -> None:
     """Test that empty kwargs are stored as empty dict in State."""
-    from agent_framework import Executor, WorkflowContext, handler
+    from typing_extensions import Never
+
+    from agent_framework import AgentResponse, Executor, WorkflowContext, handler
 
     stored_kwargs: Any = "NOT_CHECKED"
 
     class _StateChecker(Executor):
         @handler
-        async def check(self, msgs: list[Message], ctx: WorkflowContext[list[Message]]) -> None:
+        async def check(self, msgs: list[Message], ctx: WorkflowContext[Never, AgentResponse]) -> None:
             nonlocal stored_kwargs
             stored_kwargs = ctx.get_state(WORKFLOW_RUN_KWARGS_KEY)
-            await ctx.send_message(msgs)
+            await ctx.yield_output(AgentResponse(messages=msgs))
 
     checker = _StateChecker(id="checker")
     workflow = SequentialBuilder(participants=[checker]).build()
@@ -695,7 +699,9 @@ async def test_subworkflow_kwargs_accessible_via_state() -> None:
     Verifies that WORKFLOW_RUN_KWARGS_KEY is populated in the subworkflow's State
     with kwargs from the parent workflow.
     """
-    from agent_framework import Executor, WorkflowContext, handler
+    from typing_extensions import Never
+
+    from agent_framework import AgentResponse, Executor, WorkflowContext, handler
     from agent_framework._workflows._workflow_executor import WorkflowExecutor
 
     captured_kwargs_from_state: list[dict[str, Any]] = []
@@ -704,10 +710,10 @@ class _StateReader(Executor):
         """Executor that reads kwargs from State for verification."""
 
         @handler
-        async def read_kwargs(self, msgs: list[Message], ctx: WorkflowContext[list[Message]]) -> None:
+        async def read_kwargs(self, msgs: list[Message], ctx: WorkflowContext[Never, AgentResponse]) -> None:
             kwargs_from_state = ctx.get_state(WORKFLOW_RUN_KWARGS_KEY)
             captured_kwargs_from_state.append(kwargs_from_state or {})
-            await ctx.send_message(msgs)
+            await ctx.yield_output(AgentResponse(messages=msgs))
 
     # Build inner workflow with State reader
     state_reader = _StateReader(id="state_reader")

diff --git a/python/packages/foundry/tests/foundry/test_foundry_embedding_client.py b/python/packages/foundry/tests/foundry/test_foundry_embedding_client.py
@@ -303,6 +303,7 @@ def _foundry_integration_tests_enabled() -> bool:
 class TestFoundryEmbeddingIntegration:
     """Integration tests requiring a live Foundry inference endpoint."""
 
+    @pytest.mark.skip(reason="Flaky in merge queue, blocking unrelated PRs. Tracked in #5553.")
     @pytest.mark.flaky
     @pytest.mark.integration
     @skip_if_foundry_inference_integration_tests_disabled

diff --git a/python/packages/foundry_hosting/tests/test_responses_int.py b/python/packages/foundry_hosting/tests/test_responses_int.py
@@ -559,6 +559,7 @@ async def test_tool_call_streaming(self, server_with_tools: ResponsesHostServer)
 class TestOptions:
     """Verify chat options are passed through to the model."""
 
+    @pytest.mark.skip(reason="Flaky in merge queue, blocking unrelated PRs. Tracked in #5553.")
     @pytest.mark.flaky
     @pytest.mark.integration
     @skip_if_foundry_hosting_integration_tests_disabled

diff --git a/...n/packages/orchestrations/agent_framework_orchestrations/_base_group_chat_orchestrator.py b/...n/packages/orchestrations/agent_framework_orchestrations/_base_group_chat_orchestrator.py
@@ -12,7 +12,7 @@
 from dataclasses import dataclass
 from typing import Any, ClassVar, TypeAlias
 
-from agent_framework._types import Message
+from agent_framework._types import AgentResponse, AgentResponseUpdate, Message
 from agent_framework._workflows._agent_executor import AgentExecutor, AgentExecutorRequest, AgentExecutorResponse
 from agent_framework._workflows._events import WorkflowEvent
 from agent_framework._workflows._executor import Executor, handler
@@ -351,8 +351,10 @@ async def _check_termination(self) -> bool:
             result = await result
         return result
 
-    async def _check_terminate_and_yield(self, ctx: WorkflowContext[Never, list[Message]]) -> bool:
-        """Check termination conditions and yield completion if met.
+    async def _check_terminate_and_yield(
+        self, ctx: WorkflowContext[Never, AgentResponse | AgentResponseUpdate]
+    ) -> bool:
+        """Check termination conditions and yield the completion message if met.
 
         Args:
             ctx: Workflow context for yielding output
@@ -362,12 +364,37 @@ async def _check_terminate_and_yield(self, ctx: WorkflowContext[Never, list[Mess
         """
         terminate = await self._check_termination()
         if terminate:
-            self._append_messages([self._create_completion_message(self.TERMINATION_CONDITION_MET_MESSAGE)])
-            await ctx.yield_output(self._full_conversation)
+            completion_message = self._create_completion_message(self.TERMINATION_CONDITION_MET_MESSAGE)
+            self._append_messages([completion_message])
+            await self._yield_completion(ctx, completion_message)
             return True
 
         return False
 
+    async def _yield_completion(
+        self,
+        ctx: WorkflowContext[Never, AgentResponse | AgentResponseUpdate],
+        completion_message: Message,
+    ) -> None:
+        """Yield a synthesized terminal completion message in the right shape for the run mode.
+
+        Mode-aware to mirror ``AgentExecutor`` semantics:
+        - Streaming (``ctx.is_streaming()``): yield a single ``AgentResponseUpdate`` so the
+          ``output`` event stream stays uniformly per-chunk.
+        - Non-streaming: yield the full ``AgentResponse``.
+        """
+        if ctx.is_streaming():
+            await ctx.yield_output(
+                AgentResponseUpdate(
+                    contents=list(completion_message.contents),
+                    role=completion_message.role,
+                    author_name=completion_message.author_name,
+                    message_id=completion_message.message_id,
+                )
+            )
+        else:
+            await ctx.yield_output(AgentResponse(messages=[completion_message]))
+
     def _create_completion_message(self, message: str) -> Message:
         """Create a standardized completion message.
 
@@ -490,8 +517,10 @@ def _check_round_limit(self) -> bool:
 
         return False
 
-    async def _check_round_limit_and_yield(self, ctx: WorkflowContext[Never, list[Message]]) -> bool:
-        """Check round limit and yield completion if reached.
+    async def _check_round_limit_and_yield(
+        self, ctx: WorkflowContext[Never, AgentResponse | AgentResponseUpdate]
+    ) -> bool:
+        """Check round limit and yield the max-rounds completion message if reached.
 
         Args:
             ctx: Workflow context for yielding output
@@ -501,8 +530,9 @@ async def _check_round_limit_and_yield(self, ctx: WorkflowContext[Never, list[Me
         """
         reach_max_rounds = self._check_round_limit()
         if reach_max_rounds:
-            self._append_messages([self._create_completion_message(self.MAX_ROUNDS_MET_MESSAGE)])
-            await ctx.yield_output(self._full_conversation)
+            completion_message = self._create_completion_message(self.MAX_ROUNDS_MET_MESSAGE)
+            self._append_messages([completion_message])
+            await self._yield_completion(ctx, completion_message)
             return True
 
         return False

diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_concurrent.py b/python/packages/orchestrations/agent_framework_orchestrations/_concurrent.py
@@ -6,7 +6,7 @@
 from collections.abc import Callable, Sequence
 from typing import Any
 
-from agent_framework import Message, SupportsAgentRun
+from agent_framework import AgentResponse, Message, SupportsAgentRun
 from agent_framework._workflows._agent_executor import AgentExecutor, AgentExecutorRequest, AgentExecutorResponse
 from agent_framework._workflows._agent_utils import resolve_agent_id
 from agent_framework._workflows._checkpoint import CheckpointStorage
@@ -71,18 +71,20 @@ async def from_messages(
 
 
 class _AggregateAgentConversations(Executor):
-    """Aggregates agent responses and completes with combined ChatMessages.
+    """Aggregates agent responses and completes with a single AgentResponse.
 
-    Emits a list[Message] shaped as:
-      [ single_user_prompt?, agent1_final_assistant, agent2_final_assistant, ... ]
+    Emits an `AgentResponse` whose `messages` are the final assistant message from each
+    participant (one message per agent), in deterministic participant order matching
+    the fan-in `sources` configuration. The user prompt is intentionally not included —
+    that is part of the input, not the answer.
 
-    - Extracts a single user prompt (first user message seen across results).
-    - For each result, selects the final assistant message (prefers agent_response.messages).
-    - Avoids duplicating the same user message per agent.
+    For each participant the final assistant message is sourced from
+    `r.agent_response.messages`, falling back to scanning `r.full_conversation` for
+    pathological executors that did not populate the response.
     """
 
     @handler
-    async def aggregate(self, results: list[AgentExecutorResponse], ctx: WorkflowContext[Never, list[Message]]) -> None:
+    async def aggregate(self, results: list[AgentExecutorResponse], ctx: WorkflowContext[Never, AgentResponse]) -> None:
         if not results:
             logger.error("Concurrent aggregator received empty results list")
             raise ValueError("Aggregation failed: no results provided")
@@ -91,12 +93,10 @@ def _is_role(msg: Any, role: str) -> bool:
             r = getattr(msg, "role", None)
             if r is None:
                 return False
-            # Normalize both r and role to lowercase strings for comparison
             r_str = str(r).lower() if isinstance(r, str) or hasattr(r, "__str__") else r
             role_str = str(role).lower()
             return r_str == role_str
 
-        prompt_message: Message | None = None
         assistant_replies: list[Message] = []
 
         for r in results:
@@ -107,10 +107,6 @@ def _is_role(msg: Any, role: str) -> bool:
                 f"{len(resp_messages)} response msgs, {len(r.full_conversation)} conversation msgs"
             )
 
-            # Capture a single user prompt (first encountered across any conversation)
-            if prompt_message is None:
-                prompt_message = next((m for m in r.full_conversation if _is_role(m, "user")), None)
-
             # Pick the final assistant message from the response; fallback to conversation search
             final_assistant = next((m for m in reversed(resp_messages) if _is_role(m, "assistant")), None)
             if final_assistant is None:
@@ -127,14 +123,7 @@ def _is_role(msg: Any, role: str) -> bool:
             logger.error(f"Aggregation failed: no assistant replies found across {len(results)} results")
             raise RuntimeError("Aggregation failed: no assistant replies found")
 
-        output: list[Message] = []
-        if prompt_message is not None:
-            output.append(prompt_message)
-        else:
-            logger.warning("No user prompt found in any conversation; emitting assistants only")
-        output.extend(assistant_replies)
-
-        await ctx.yield_output(output)
+        await ctx.yield_output(AgentResponse(messages=assistant_replies))
 
 
 class _CallbackAggregator(Executor):
@@ -190,7 +179,8 @@ class ConcurrentBuilder:
 
         from agent_framework_orchestrations import ConcurrentBuilder
 
-        # Minimal: use default aggregator (returns list[Message])
+        # Minimal: use default aggregator (yields one AgentResponse with one assistant
+        # message per participant)
         workflow = ConcurrentBuilder(participants=[agent1, agent2, agent3]).build()
 
 
@@ -222,8 +212,9 @@ def __init__(
         Args:
             participants: Sequence of agent or executor instances to run in parallel.
             checkpoint_storage: Optional checkpoint storage for enabling workflow state persistence.
-            intermediate_outputs: If True, enables intermediate outputs from agent participants
-                before aggregation.
+            intermediate_outputs: If True, every participant's `yield_output` surfaces as a
+                workflow `output` event in addition to the aggregator's. By default
+                (False) only the aggregator's output surfaces.
         """
         self._participants: list[SupportsAgentRun | Executor] = []
         self._aggregator: Executor | None = None
@@ -383,7 +374,7 @@ def build(self) -> Workflow:
         - If request info is enabled, the orchestration emits a request info event with outputs from all participants
             before sending the outputs to the aggregator
         - Aggregator yields output and the workflow becomes idle. The output is either:
-          - list[Message] (default aggregator: one user + one assistant per agent)
+          - AgentResponse (default aggregator: one assistant message per participant)
           - custom payload from the provided aggregator
 
         Returns: