Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c5e4e51
fix(event_loop): raise dedicated exception when encountering max toke…
dbschmigelski Jul 30, 2025
6703819
fix: update integ tests
dbschmigelski Jul 30, 2025
c94b74e
fix: rename exception message, add to exception, move earlier in cycle
dbschmigelski Jul 31, 2025
36dd0f9
Update tests_integ/test_max_tokens_reached.py
dbschmigelski Jul 31, 2025
e04c73d
Update tests_integ/test_max_tokens_reached.py
dbschmigelski Jul 31, 2025
cca2f86
linting
dbschmigelski Jul 31, 2025
f647baa
Merge branch 'strands-agents:main' into fix-max-tokens
dbschmigelski Jul 31, 2025
78c5a91
Merge branch 'strands-agents:main' into fix-max-tokens
dbschmigelski Aug 1, 2025
a208496
Merge branch 'strands-agents:main' into fix-max-tokens
dbschmigelski Aug 4, 2025
2e2d4df
feat: add builtin hook provider to address max tokens reached truncation
dbschmigelski Aug 4, 2025
447d147
tests: modify integ test to inspect message history
dbschmigelski Aug 4, 2025
564895d
fix: fix linting errors
dbschmigelski Aug 4, 2025
2f118fb
fix: linting
dbschmigelski Aug 4, 2025
e5fc51a
refactor: switch from hook approach to conversation manager
dbschmigelski Aug 5, 2025
5906fc2
linting
dbschmigelski Aug 5, 2025
87445a3
fix: test contained incorrect assertions
dbschmigelski Aug 6, 2025
924fea9
fix: add event emission
dbschmigelski Aug 6, 2025
104f6b4
feat: move to async
dbschmigelski Aug 6, 2025
11b91f4
feat: add additional error case where no tool uses were fixed
dbschmigelski Aug 6, 2025
1da9ba7
feat: add max tokens reached test
dbschmigelski Aug 6, 2025
623f3c7
linting
dbschmigelski Aug 6, 2025
66c4c07
feat: add max tokens reached test
dbschmigelski Aug 6, 2025
4b5c5a7
feat: switch to a default behavior to recover from max tokens reached
dbschmigelski Aug 7, 2025
83ad822
fix: all tool uses now must be replaced
dbschmigelski Aug 8, 2025
faa4618
fix: boolean
dbschmigelski Aug 8, 2025
fa8195f
remove todo
dbschmigelski Aug 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions src/strands/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from ..tools.registry import ToolRegistry
from ..tools.watcher import ToolWatcher
from ..types.content import ContentBlock, Message, Messages
from ..types.exceptions import ContextWindowOverflowException
from ..types.exceptions import ContextWindowOverflowException, MaxTokensReachedException
from ..types.tools import ToolResult, ToolUse
from ..types.traces import AttributeValue
from .agent_result import AgentResult
Expand Down Expand Up @@ -582,18 +582,21 @@ async def _execute_event_loop_cycle(self, invocation_state: dict[str, Any]) -> A
)
async for event in events:
yield event

return
except ContextWindowOverflowException as e:
# Try reducing the context size and retrying
self.conversation_manager.reduce_context(self, e=e)
self.conversation_manager.reduce_context(agent=self, e=e)
except MaxTokensReachedException as e:
# Recover conversation state after token limit exceeded, then continue with next cycle
await self.conversation_manager.handle_token_limit_reached(agent=self, e=e)

# Sync agent after reduce_context to keep conversation_manager_state up to date in the session
if self._session_manager:
self._session_manager.sync_agent(self)
# Sync agent after handling exception to keep conversation_manager_state up to date in the session
if self._session_manager:
self._session_manager.sync_agent(self)

events = self._execute_event_loop_cycle(invocation_state)
async for event in events:
yield event
events = self._execute_event_loop_cycle(invocation_state)
async for event in events:
yield event

def _record_tool_execution(
self,
Expand Down
2 changes: 2 additions & 0 deletions src/strands/agent/conversation_manager/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@

from .conversation_manager import ConversationManager
from .null_conversation_manager import NullConversationManager
from .recover_tool_use_on_max_tokens_reached import recover_tool_use_on_max_tokens_reached
from .sliding_window_conversation_manager import SlidingWindowConversationManager
from .summarizing_conversation_manager import SummarizingConversationManager

__all__ = [
"ConversationManager",
"NullConversationManager",
"recover_tool_use_on_max_tokens_reached",
"SlidingWindowConversationManager",
"SummarizingConversationManager",
]
15 changes: 15 additions & 0 deletions src/strands/agent/conversation_manager/conversation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TYPE_CHECKING, Any, Optional

from ...types.content import Message
from ...types.exceptions import MaxTokensReachedException

if TYPE_CHECKING:
from ...agent.agent import Agent
Expand Down Expand Up @@ -86,3 +87,17 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None, **kwargs
**kwargs: Additional keyword arguments for future extensibility.
"""
pass

async def handle_token_limit_reached(self, agent: "Agent", e: MaxTokensReachedException, **kwargs: Any) -> None:
"""Called when MaxTokensReachedException is thrown to recover conversation state.

This method should implement recovery strategies when the token limit is exceeded and the message array
may be in a broken state.

Args:
agent: The agent whose conversation state will be recovered.
This list is modified in-place.
e: The MaxTokensReachedException that triggered the recovery.
**kwargs: Additional keyword arguments for future extensibility.
"""
raise e
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Shared utility for handling token limit recovery in conversation managers."""

import logging
from typing import TYPE_CHECKING

from ...hooks import MessageAddedEvent
from ...types.content import ContentBlock, Message
from ...types.exceptions import MaxTokensReachedException
from ...types.tools import ToolUse

if TYPE_CHECKING:
from ...agent.agent import Agent

logger = logging.getLogger(__name__)


async def recover_tool_use_on_max_tokens_reached(agent: "Agent", exception: MaxTokensReachedException) -> None:
"""Handle MaxTokensReachedException by cleaning up orphaned tool uses and adding corrected message.

This function fixes incomplete tool uses that may occur when the model's response is truncated
due to token limits. It:

1. Inspects each content block in the incomplete message for invalid tool uses
2. Replaces incomplete tool use blocks with informative text messages
3. Preserves valid content blocks in the corrected message
4. Adds the corrected message to the agent's conversation history

Args:
agent: The agent whose conversation will be updated with the corrected message.
exception: The MaxTokensReachedException containing the incomplete message.
"""
logger.info("handling MaxTokensReachedException - inspecting incomplete message for invalid tool uses")

incomplete_message: Message = exception.incomplete_message

if not incomplete_message["content"]:
# Cannot correct invalid content block if content is empty
raise exception

valid_content: list[ContentBlock] = []
for content in incomplete_message["content"]:
tool_use: ToolUse | None = content.get("toolUse")
if not tool_use:
valid_content.append(content)
continue

# Check if tool use is incomplete (missing or empty required fields)
tool_name = tool_use.get("name")
if not (tool_name and tool_use.get("input") and tool_use.get("toolUseId")):
# Tool use is incomplete due to max_tokens truncation
display_name = tool_name if tool_name else "<unknown>"
logger.warning("tool_name=<%s> | replacing with error message due to max_tokens truncation.", display_name)

valid_content.append(
{
"text": f"The selected tool {display_name}'s tool use was incomplete due "
f"to maximum token limits being reached."
}
)
else:
# ToolUse was invalid for an unknown reason. Cannot correct, return without modifying
raise exception

valid_message: Message = {"content": valid_content, "role": incomplete_message["role"]}
agent.messages.append(valid_message)
agent.hooks.invoke_callbacks(MessageAddedEvent(agent=agent, message=valid_message))
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from ...agent.agent import Agent

from ...types.content import Messages
from ...types.exceptions import ContextWindowOverflowException
from ...types.exceptions import ContextWindowOverflowException, MaxTokensReachedException
from .conversation_manager import ConversationManager
from .recover_tool_use_on_max_tokens_reached import recover_tool_use_on_max_tokens_reached

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -112,6 +113,16 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None, **kwargs
# Overwrite message history
messages[:] = messages[trim_index:]

async def handle_token_limit_reached(self, agent: "Agent", e: MaxTokensReachedException, **kwargs: Any) -> None:
"""Apply sliding window strategy for token limit recovery.

Args:
agent: The agent whose conversation state will be recovered.
e: The MaxTokensReachedException that triggered the recovery.
**kwargs: Additional keyword arguments for future extensibility.
"""
await recover_tool_use_on_max_tokens_reached(agent, e)

def _truncate_tool_results(self, messages: Messages, msg_idx: int) -> bool:
"""Truncate tool results in a message to reduce context size.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from typing_extensions import override

from ...types.content import Message
from ...types.exceptions import ContextWindowOverflowException
from ...types.exceptions import ContextWindowOverflowException, MaxTokensReachedException
from .conversation_manager import ConversationManager
from .recover_tool_use_on_max_tokens_reached import recover_tool_use_on_max_tokens_reached

if TYPE_CHECKING:
from ..agent import Agent
Expand Down Expand Up @@ -166,6 +167,16 @@ def reduce_context(self, agent: "Agent", e: Optional[Exception] = None, **kwargs
logger.error("Summarization failed: %s", summarization_error)
raise summarization_error from e

async def handle_token_limit_reached(self, agent: "Agent", e: MaxTokensReachedException, **kwargs: Any) -> None:
"""Apply summarization strategy for token limit recovery.

Args:
agent: The agent whose conversation state will be recovered.
e: The MaxTokensReachedException that triggered the recovery.
**kwargs: Additional keyword arguments for future extensibility.
"""
await recover_tool_use_on_max_tokens_reached(agent, e)

def _generate_summary(self, messages: List[Message], agent: "Agent") -> Message:
"""Generate a summary of the provided messages.

Expand Down
1 change: 1 addition & 0 deletions tests/strands/agent/conversation_manager/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Test package for conversation manager
Loading
Loading