From 3fc1381e76f359bac23741bd13b86d3d005b4067 Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Tue, 14 Oct 2025 15:34:28 -0700 Subject: [PATCH 1/6] feat: enhance AnthropicCompletion class with additional client parameters and tool handling - Added support for client_params in the AnthropicCompletion class to allow for additional client configuration. - Refactored client initialization to use a dedicated method for retrieving client parameters. - Implemented a new method to handle tool use conversation flow, ensuring proper execution and response handling. - Introduced comprehensive test cases to validate the functionality of the AnthropicCompletion class, including tool use scenarios and parameter handling. --- .../llms/providers/anthropic/completion.py | 259 +++++-- .../tests/llms/anthropic/test_anthropic.py | 660 ++++++++++++++++++ 2 files changed, 879 insertions(+), 40 deletions(-) create mode 100644 lib/crewai/tests/llms/anthropic/test_anthropic.py diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 691490dd29..a90f06573c 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -40,6 +40,7 @@ def __init__( top_p: float | None = None, stop_sequences: list[str] | None = None, stream: bool = False, + client_params: dict[str, Any] | None = None, **kwargs, ): """Initialize Anthropic chat completion client. @@ -55,19 +56,20 @@ def __init__( top_p: Nucleus sampling parameter stop_sequences: Stop sequences (Anthropic uses stop_sequences, not stop) stream: Enable streaming responses + client_params: Additional parameters for the Anthropic client **kwargs: Additional parameters """ super().__init__( model=model, temperature=temperature, stop=stop_sequences or [], **kwargs ) - # Initialize Anthropic client - self.client = Anthropic( - api_key=api_key or os.getenv("ANTHROPIC_API_KEY"), - base_url=base_url, - timeout=timeout, - max_retries=max_retries, - ) + # Client params + self.client_params = client_params + self.base_url = base_url + self.timeout = timeout + self.max_retries = max_retries + + self.client = Anthropic(**self._get_client_params()) # Store completion parameters self.max_tokens = max_tokens @@ -79,6 +81,26 @@ def __init__( self.is_claude_3 = "claude-3" in model.lower() self.supports_tools = self.is_claude_3 # Claude 3+ supports tool use + def _get_client_params(self) -> dict[str, Any]: + """Get client parameters.""" + + if self.api_key is None: + self.api_key = os.getenv("ANTHROPIC_API_KEY") + if self.api_key is None: + raise ValueError("ANTHROPIC_API_KEY is required") + + client_params = { + "api_key": self.api_key, + "base_url": self.base_url, + "timeout": self.timeout, + "max_retries": self.max_retries, + } + + if self.client_params: + client_params.update(self.client_params) + + return client_params + def call( self, messages: str | list[dict[str, str]], @@ -102,6 +124,7 @@ def call( Chat completion response or tool call result """ try: + print("we are calling", messages) # Emit call started event self._emit_call_started_event( messages=messages, @@ -121,6 +144,7 @@ def call( completion_params = self._prepare_completion_params( formatted_messages, system_message, tools ) + print("completion_params", completion_params) # Handle streaming vs non-streaming if self.stream: @@ -183,12 +207,25 @@ def _prepare_completion_params( def _convert_tools_for_interference(self, tools: list[dict]) -> list[dict]: """Convert CrewAI tool format to Anthropic tool use format.""" - from crewai.llms.providers.utils.common import safe_tool_conversion - anthropic_tools = [] for tool in tools: - name, description, parameters = safe_tool_conversion(tool, "Anthropic") + if "input_schema" in tool and "name" in tool and "description" in tool: + anthropic_tools.append(tool) + continue + + try: + from crewai.llms.providers.utils.common import safe_tool_conversion + + name, description, parameters = safe_tool_conversion(tool, "Anthropic") + except (ImportError, Exception): + name = tool.get("name", "unknown_tool") + description = tool.get("description", "A tool function") + parameters = ( + tool.get("input_schema") + or tool.get("parameters") + or tool.get("schema") + ) anthropic_tool = { "name": name, @@ -196,7 +233,13 @@ def _convert_tools_for_interference(self, tools: list[dict]) -> list[dict]: } if parameters and isinstance(parameters, dict): - anthropic_tool["input_schema"] = parameters # type: ignore + anthropic_tool["input_schema"] = parameters + else: + anthropic_tool["input_schema"] = { + "type": "object", + "properties": {}, + "required": [], + } anthropic_tools.append(anthropic_tool) @@ -229,13 +272,11 @@ def _format_messages_for_anthropic( content = message.get("content", "") if role == "system": - # Extract system message - Anthropic handles it separately if system_message: system_message += f"\n\n{content}" else: system_message = content else: - # Add user/assistant messages - ensure both role and content are str, not None role_str = role if role is not None else "user" content_str = content if content is not None else "" formatted_messages.append({"role": role_str, "content": content_str}) @@ -259,6 +300,7 @@ def _handle_completion( ) -> str | Any: """Handle non-streaming message completion.""" try: + print("params", params) response: Message = self.client.messages.create(**params) except Exception as e: @@ -270,22 +312,22 @@ def _handle_completion( usage = self._extract_anthropic_token_usage(response) self._track_token_usage_internal(usage) + # Check if Claude wants to use tools if response.content and available_functions: - for content_block in response.content: - if isinstance(content_block, ToolUseBlock): - function_name = content_block.name - function_args = content_block.input - - result = self._handle_tool_execution( - function_name=function_name, - function_args=function_args, # type: ignore - available_functions=available_functions, - from_task=from_task, - from_agent=from_agent, - ) - - if result is not None: - return result + tool_uses = [ + block for block in response.content if isinstance(block, ToolUseBlock) + ] + + if tool_uses: + # Handle tool use conversation flow + return self._handle_tool_use_conversation( + response, + tool_uses, + params, + available_functions, + from_task, + from_agent, + ) # Extract text content content = "" @@ -350,26 +392,54 @@ def _handle_streaming_completion( # Handle completed tool uses if tool_uses and available_functions: - for tool_data in tool_uses.values(): - function_name = tool_data["name"] - + # Convert streamed tool uses to ToolUseBlock-like objects for consistency + tool_use_blocks = [] + for tool_id, tool_data in tool_uses.items(): try: function_args = json.loads(tool_data["input"]) except json.JSONDecodeError as e: logging.error(f"Failed to parse streamed tool arguments: {e}") continue - # Execute tool - result = self._handle_tool_execution( - function_name=function_name, - function_args=function_args, - available_functions=available_functions, - from_task=from_task, - from_agent=from_agent, + # Create a mock ToolUseBlock-like object + class MockToolUse: + def __init__(self, tool_id: str, name: str, input_args: dict): + self.id = tool_id + self.name = name + self.input = input_args + + tool_use_blocks.append( + MockToolUse(tool_id, tool_data["name"], function_args) ) - if result is not None: - return result + if tool_use_blocks: + # Create a mock response object for the tool conversation flow + class MockResponse: + def __init__(self, content_blocks): + self.content = content_blocks + + # Combine text content and tool uses in the response + response_content = [] + if full_response.strip(): # Add text content if any + + class MockTextBlock: + def __init__(self, text: str): + self.text = text + + response_content.append(MockTextBlock(full_response)) + + response_content.extend(tool_use_blocks) + mock_response = MockResponse(response_content) + + # Handle tool use conversation flow + return self._handle_tool_use_conversation( + mock_response, + tool_use_blocks, + params, + available_functions, + from_task, + from_agent, + ) # Apply stop words to full response full_response = self._apply_stop_words(full_response) @@ -385,6 +455,115 @@ def _handle_streaming_completion( return full_response + def _handle_tool_use_conversation( + self, + initial_response: Message + | Any, # Can be Message or mock response from streaming + tool_uses: list[ToolUseBlock] + | list[Any], # Can be ToolUseBlock or mock objects + params: dict[str, Any], + available_functions: dict[str, Any], + from_task: Any | None = None, + from_agent: Any | None = None, + ) -> str: + """Handle the complete tool use conversation flow. + + This implements the proper Anthropic tool use pattern: + 1. Claude requests tool use + 2. We execute the tools + 3. We send tool results back to Claude + 4. Claude processes results and generates final response + """ + # Execute all requested tools and collect results + tool_results = [] + + for tool_use in tool_uses: + function_name = tool_use.name + function_args = tool_use.input + + # Execute the tool + result = self._handle_tool_execution( + function_name=function_name, + function_args=function_args, # type: ignore + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + ) + + # Create tool result in Anthropic format + tool_result = { + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": str(result) + if result is not None + else "Tool execution completed", + } + tool_results.append(tool_result) + + # Prepare follow-up conversation with tool results + follow_up_params = params.copy() + + # Add Claude's tool use response to conversation + assistant_message = {"role": "assistant", "content": initial_response.content} + + # Add user message with tool results + user_message = {"role": "user", "content": tool_results} + + # Update messages for follow-up call + follow_up_params["messages"] = params["messages"] + [ + assistant_message, + user_message, + ] + + try: + # Send tool results back to Claude for final response + final_response: Message = self.client.messages.create(**follow_up_params) + + # Track token usage for follow-up call + follow_up_usage = self._extract_anthropic_token_usage(final_response) + self._track_token_usage_internal(follow_up_usage) + + # Extract final text content + final_content = "" + if final_response.content: + for content_block in final_response.content: + if hasattr(content_block, "text"): + final_content += content_block.text + + final_content = self._apply_stop_words(final_content) + + # Emit completion event for the final response + self._emit_call_completed_event( + response=final_content, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=follow_up_params["messages"], + ) + + # Log combined token usage + total_usage = { + "input_tokens": follow_up_usage.get("input_tokens", 0), + "output_tokens": follow_up_usage.get("output_tokens", 0), + "total_tokens": follow_up_usage.get("total_tokens", 0), + } + + if total_usage.get("total_tokens", 0) > 0: + logging.info(f"Anthropic API tool conversation usage: {total_usage}") + + return final_content + + except Exception as e: + if is_context_length_exceeded(e): + logging.error(f"Context window exceeded in tool follow-up: {e}") + raise LLMContextLengthExceededError(str(e)) from e + + logging.error(f"Tool follow-up conversation failed: {e}") + # Fallback: return the first tool result if follow-up fails + if tool_results: + return tool_results[0]["content"] + raise e + def supports_function_calling(self) -> bool: """Check if the model supports function calling.""" return self.supports_tools diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py new file mode 100644 index 0000000000..7d0780561d --- /dev/null +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -0,0 +1,660 @@ +import os +import sys +import types +from unittest.mock import patch, MagicMock +import pytest + +from crewai.llm import LLM +from crewai.llms.providers.anthropic.completion import AnthropicCompletion +from crewai.crew import Crew +from crewai.agent import Agent +from crewai.task import Task +from crewai.cli.constants import DEFAULT_LLM_MODEL + + +def test_anthropic_completion_is_used_when_anthropic_provider(): + """ + Test that AnthropicCompletion from completion.py is used when LLM uses provider 'anthropic' + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + assert llm.__class__.__name__ == "AnthropicCompletion" + assert llm.provider == "anthropic" + assert llm.model == "claude-3-5-sonnet-20241022" + + +def test_anthropic_completion_is_used_when_claude_provider(): + """ + Test that AnthropicCompletion is used when provider is 'claude' + """ + llm = LLM(model="claude/claude-3-5-sonnet-20241022") + + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + assert llm.provider == "claude" + assert llm.model == "claude-3-5-sonnet-20241022" + + + + +def test_anthropic_tool_use_conversation_flow(): + """ + Test that the Anthropic completion properly handles tool use conversation flow + """ + from unittest.mock import Mock, patch + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + from anthropic.types.tool_use_block import ToolUseBlock + + # Create AnthropicCompletion instance + completion = AnthropicCompletion(model="claude-3-5-sonnet-20241022") + + # Mock tool function + def mock_weather_tool(location: str) -> str: + return f"The weather in {location} is sunny and 75°F" + + available_functions = {"get_weather": mock_weather_tool} + + # Mock the Anthropic client responses + with patch.object(completion.client.messages, 'create') as mock_create: + # Mock initial response with tool use - need to properly mock ToolUseBlock + mock_tool_use = Mock(spec=ToolUseBlock) + mock_tool_use.id = "tool_123" + mock_tool_use.name = "get_weather" + mock_tool_use.input = {"location": "San Francisco"} + + mock_initial_response = Mock() + mock_initial_response.content = [mock_tool_use] + mock_initial_response.usage = Mock() + mock_initial_response.usage.input_tokens = 100 + mock_initial_response.usage.output_tokens = 50 + + # Mock final response after tool result - properly mock text content + mock_text_block = Mock() + # Set the text attribute as a string, not another Mock + mock_text_block.configure_mock(text="Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature.") + + mock_final_response = Mock() + mock_final_response.content = [mock_text_block] + mock_final_response.usage = Mock() + mock_final_response.usage.input_tokens = 150 + mock_final_response.usage.output_tokens = 75 + + # Configure mock to return different responses on successive calls + mock_create.side_effect = [mock_initial_response, mock_final_response] + + # Test the call + messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] + result = completion.call( + messages=messages, + available_functions=available_functions + ) + + # Verify the result contains the final response + assert "beautiful day in San Francisco" in result + assert "sunny skies" in result + assert "75°F" in result + + # Verify that two API calls were made (initial + follow-up) + assert mock_create.call_count == 2 + + # Verify the second call includes tool results + second_call_args = mock_create.call_args_list[1][1] # kwargs of second call + messages_in_second_call = second_call_args["messages"] + + # Should have original user message + assistant tool use + user tool result + assert len(messages_in_second_call) == 3 + assert messages_in_second_call[0]["role"] == "user" + assert messages_in_second_call[1]["role"] == "assistant" + assert messages_in_second_call[2]["role"] == "user" + + # Verify tool result format + tool_result = messages_in_second_call[2]["content"][0] + assert tool_result["type"] == "tool_result" + assert tool_result["tool_use_id"] == "tool_123" + assert "sunny and 75°F" in tool_result["content"] + + +def test_anthropic_completion_module_is_imported(): + """ + Test that the completion module is properly imported when using Anthropic provider + """ + module_name = "crewai.llms.providers.anthropic.completion" + + # Remove module from cache if it exists + if module_name in sys.modules: + del sys.modules[module_name] + + # Create LLM instance - this should trigger the import + LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Verify the module was imported + assert module_name in sys.modules + completion_mod = sys.modules[module_name] + assert isinstance(completion_mod, types.ModuleType) + + # Verify the class exists in the module + assert hasattr(completion_mod, 'AnthropicCompletion') + + +def test_fallback_to_litellm_when_native_anthropic_fails(): + """ + Test that LLM falls back to LiteLLM when native Anthropic completion fails + """ + # Mock the _get_native_provider to return a failing class + with patch('crewai.llm.LLM._get_native_provider') as mock_get_provider: + + class FailingCompletion: + def __init__(self, *args, **kwargs): + raise Exception("Native Anthropic SDK failed") + + mock_get_provider.return_value = FailingCompletion + + # This should fall back to LiteLLM + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Check that it's using LiteLLM + assert hasattr(llm, 'is_litellm') + assert llm.is_litellm == True + + +def test_anthropic_completion_initialization_parameters(): + """ + Test that AnthropicCompletion is initialized with correct parameters + """ + llm = LLM( + model="anthropic/claude-3-5-sonnet-20241022", + temperature=0.7, + max_tokens=2000, + top_p=0.9, + api_key="test-key" + ) + + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + assert llm.model == "claude-3-5-sonnet-20241022" + assert llm.temperature == 0.7 + assert llm.max_tokens == 2000 + assert llm.top_p == 0.9 + + +def test_anthropic_specific_parameters(): + """ + Test Anthropic-specific parameters like stop_sequences and streaming + """ + llm = LLM( + model="anthropic/claude-3-5-sonnet-20241022", + stop_sequences=["Human:", "Assistant:"], + stream=True, + max_retries=5, + timeout=60 + ) + + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + assert llm.stop_sequences == ["Human:", "Assistant:"] + assert llm.stream == True + assert llm.client.max_retries == 5 + assert llm.client.timeout == 60 + + +def test_anthropic_completion_call(): + """ + Test that AnthropicCompletion call method works + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock the call method on the instance + with patch.object(llm, 'call', return_value="Hello! I'm Claude, ready to help.") as mock_call: + result = llm.call("Hello, how are you?") + + assert result == "Hello! I'm Claude, ready to help." + mock_call.assert_called_once_with("Hello, how are you?") + + +def test_anthropic_completion_called_during_crew_execution(): + """ + Test that AnthropicCompletion.call is actually invoked when running a crew + """ + # Create the LLM instance first + anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock the call method on the specific instance + with patch.object(anthropic_llm, 'call', return_value="Tokyo has 14 million people.") as mock_call: + + # Create agent with explicit LLM configuration + agent = Agent( + role="Research Assistant", + goal="Find population info", + backstory="You research populations.", + llm=anthropic_llm, + ) + + task = Task( + description="Find Tokyo population", + expected_output="Population number", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task]) + result = crew.kickoff() + + # Verify mock was called + assert mock_call.called + assert "14 million" in str(result) + + +def test_anthropic_completion_call_arguments(): + """ + Test that AnthropicCompletion.call is invoked with correct arguments + """ + # Create LLM instance first + anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock the instance method + with patch.object(anthropic_llm, 'call') as mock_call: + mock_call.return_value = "Task completed successfully." + + agent = Agent( + role="Test Agent", + goal="Complete a simple task", + backstory="You are a test agent.", + llm=anthropic_llm # Use same instance + ) + + task = Task( + description="Say hello world", + expected_output="Hello world", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task]) + crew.kickoff() + + # Verify call was made + assert mock_call.called + + # Check the arguments passed to the call method + call_args = mock_call.call_args + assert call_args is not None + + # The first argument should be the messages + messages = call_args[0][0] # First positional argument + assert isinstance(messages, (str, list)) + + # Verify that the task description appears in the messages + if isinstance(messages, str): + assert "hello world" in messages.lower() + elif isinstance(messages, list): + message_content = str(messages).lower() + assert "hello world" in message_content + + +def test_multiple_anthropic_calls_in_crew(): + """ + Test that AnthropicCompletion.call is invoked multiple times for multiple tasks + """ + # Create LLM instance first + anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock the instance method + with patch.object(anthropic_llm, 'call') as mock_call: + mock_call.return_value = "Task completed." + + agent = Agent( + role="Multi-task Agent", + goal="Complete multiple tasks", + backstory="You can handle multiple tasks.", + llm=anthropic_llm # Use same instance + ) + + task1 = Task( + description="First task", + expected_output="First result", + agent=agent, + ) + + task2 = Task( + description="Second task", + expected_output="Second result", + agent=agent, + ) + + crew = Crew( + agents=[agent], + tasks=[task1, task2] + ) + crew.kickoff() + + # Verify multiple calls were made + assert mock_call.call_count >= 2 # At least one call per task + + # Verify each call had proper arguments + for call in mock_call.call_args_list: + assert len(call[0]) > 0 # Has positional arguments + messages = call[0][0] + assert messages is not None + + +def test_anthropic_completion_with_tools(): + """ + Test that AnthropicCompletion.call is invoked with tools when agent has tools + """ + from crewai.tools import tool + + @tool + def sample_tool(query: str) -> str: + """A sample tool for testing""" + return f"Tool result for: {query}" + + # Create LLM instance first + anthropic_llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock the instance method + with patch.object(anthropic_llm, 'call') as mock_call: + mock_call.return_value = "Task completed with tools." + + agent = Agent( + role="Tool User", + goal="Use tools to complete tasks", + backstory="You can use tools.", + llm=anthropic_llm, # Use same instance + tools=[sample_tool] + ) + + task = Task( + description="Use the sample tool", + expected_output="Tool usage result", + agent=agent, + ) + + crew = Crew(agents=[agent], tasks=[task]) + crew.kickoff() + + assert mock_call.called + + call_args = mock_call.call_args + call_kwargs = call_args[1] if len(call_args) > 1 else {} + + if 'tools' in call_kwargs: + assert call_kwargs['tools'] is not None + assert len(call_kwargs['tools']) > 0 + + +def test_anthropic_raises_error_when_model_not_supported(): + """Test that AnthropicCompletion raises ValueError when model not supported""" + + # Mock the Anthropic client to raise an error + with patch('crewai.llms.providers.anthropic.completion.Anthropic') as mock_anthropic_class: + mock_client = MagicMock() + mock_anthropic_class.return_value = mock_client + + # Mock the error that Anthropic would raise for unsupported models + from anthropic import NotFoundError + mock_client.messages.create.side_effect = NotFoundError( + message="The model `model-doesnt-exist` does not exist", + response=MagicMock(), + body={} + ) + + llm = LLM(model="anthropic/model-doesnt-exist") + + with pytest.raises(Exception): # Should raise some error for unsupported model + llm.call("Hello") + + +def test_anthropic_client_params_setup(): + """ + Test that client_params are properly merged with default client parameters + """ + # Use only valid Anthropic client parameters + custom_client_params = { + "default_headers": {"X-Custom-Header": "test-value"}, + } + + with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}): + llm = LLM( + model="anthropic/claude-3-5-sonnet-20241022", + api_key="test-key", + base_url="https://custom-api.com", + timeout=45, + max_retries=5, + client_params=custom_client_params + ) + + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + + assert llm.client_params == custom_client_params + + merged_params = llm._get_client_params() + + assert merged_params["api_key"] == "test-key" + assert merged_params["base_url"] == "https://custom-api.com" + assert merged_params["timeout"] == 45 + assert merged_params["max_retries"] == 5 + + assert merged_params["default_headers"] == {"X-Custom-Header": "test-value"} + + +def test_anthropic_client_params_override_defaults(): + """ + Test that client_params can override default client parameters + """ + override_client_params = { + "timeout": 120, # Override the timeout parameter + "max_retries": 10, # Override the max_retries parameter + "default_headers": {"X-Override": "true"} # Valid custom parameter + } + + with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}): + llm = LLM( + model="anthropic/claude-3-5-sonnet-20241022", + api_key="test-key", + timeout=30, + max_retries=3, + client_params=override_client_params + ) + + # Verify this is actually AnthropicCompletion, not LiteLLM fallback + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + + merged_params = llm._get_client_params() + + # client_params should override the individual parameters + assert merged_params["timeout"] == 120 + assert merged_params["max_retries"] == 10 + assert merged_params["default_headers"] == {"X-Override": "true"} + + +def test_anthropic_client_params_none(): + """ + Test that client_params=None works correctly (no additional parameters) + """ + with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}): + llm = LLM( + model="anthropic/claude-3-5-sonnet-20241022", + api_key="test-key", + base_url="https://api.anthropic.com", + timeout=60, + max_retries=2, + client_params=None + ) + + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + + assert llm.client_params is None + + merged_params = llm._get_client_params() + + expected_keys = {"api_key", "base_url", "timeout", "max_retries"} + assert set(merged_params.keys()) == expected_keys + + # Fixed assertions - all should be inside the with block and use correct values + assert merged_params["api_key"] == "test-key" # Not "test-anthropic-key" + assert merged_params["base_url"] == "https://api.anthropic.com" + assert merged_params["timeout"] == 60 + assert merged_params["max_retries"] == 2 + + +def test_anthropic_client_params_empty_dict(): + """ + Test that client_params={} works correctly (empty additional parameters) + """ + with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}): + llm = LLM( + model="anthropic/claude-3-5-sonnet-20241022", + api_key="test-key", + client_params={} + ) + + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion) + + assert llm.client_params == {} + + merged_params = llm._get_client_params() + + assert "api_key" in merged_params + assert merged_params["api_key"] == "test-key" + + +def test_anthropic_model_detection(): + """ + Test that various Anthropic model formats are properly detected + """ + # Test Anthropic model naming patterns that actually work with provider detection + anthropic_test_cases = [ + "anthropic/claude-3-5-sonnet-20241022", + "claude/claude-3-5-sonnet-20241022" + ] + + for model_name in anthropic_test_cases: + llm = LLM(model=model_name) + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + assert isinstance(llm, AnthropicCompletion), f"Failed for model: {model_name}" + + +def test_anthropic_supports_stop_words(): + """ + Test that Anthropic models support stop sequences + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + assert llm.supports_stop_words() == True + + +def test_anthropic_context_window_size(): + """ + Test that Anthropic models return correct context window sizes + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + context_size = llm.get_context_window_size() + + # Should return a reasonable context window size (Claude 3.5 has 200k tokens) + assert context_size > 100000 # Should be substantial + assert context_size <= 200000 # But not exceed the actual limit + + +def test_anthropic_message_formatting(): + """ + Test that messages are properly formatted for Anthropic API + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Test message formatting + test_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"} + ] + + formatted_messages, system_message = llm._format_messages_for_anthropic(test_messages) + + # System message should be extracted + assert system_message == "You are a helpful assistant." + + # Remaining messages should start with user + assert formatted_messages[0]["role"] == "user" + assert len(formatted_messages) >= 3 # Should have user, assistant, user messages + + +def test_anthropic_streaming_parameter(): + """ + Test that streaming parameter is properly handled + """ + # Test non-streaming + llm_no_stream = LLM(model="anthropic/claude-3-5-sonnet-20241022", stream=False) + assert llm_no_stream.stream == False + + # Test streaming + llm_stream = LLM(model="anthropic/claude-3-5-sonnet-20241022", stream=True) + assert llm_stream.stream == True + + +def test_anthropic_tool_conversion(): + """ + Test that tools are properly converted to Anthropic format + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock tool in CrewAI format + crewai_tools = [{ + "type": "function", + "function": { + "name": "test_tool", + "description": "A test tool", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "Search query"} + }, + "required": ["query"] + } + } + }] + + # Test tool conversion + anthropic_tools = llm._convert_tools_for_interference(crewai_tools) + + assert len(anthropic_tools) == 1 + assert anthropic_tools[0]["name"] == "test_tool" + assert anthropic_tools[0]["description"] == "A test tool" + assert "input_schema" in anthropic_tools[0] + + +def test_anthropic_environment_variable_api_key(): + """ + Test that Anthropic API key is properly loaded from environment + """ + with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-anthropic-key"}): + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + assert llm.client is not None + assert hasattr(llm.client, 'messages') + + +def test_anthropic_token_usage_tracking(): + """ + Test that token usage is properly tracked for Anthropic responses + """ + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + # Mock the Anthropic response with usage information + with patch.object(llm.client.messages, 'create') as mock_create: + mock_response = MagicMock() + mock_response.content = [MagicMock(text="test response")] + mock_response.usage = MagicMock(input_tokens=50, output_tokens=25) + mock_create.return_value = mock_response + + result = llm.call("Hello") + + # Verify the response + assert result == "test response" + + # Verify token usage was extracted + usage = llm._extract_anthropic_token_usage(mock_response) + assert usage["input_tokens"] == 50 + assert usage["output_tokens"] == 25 + assert usage["total_tokens"] == 75 From 7045ed389ae5bd9b4a3fc29002d4bc238772c88a Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Tue, 14 Oct 2025 15:36:30 -0700 Subject: [PATCH 2/6] drop print statements --- lib/crewai/src/crewai/llms/providers/anthropic/completion.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index a90f06573c..ffcaf30773 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -124,7 +124,6 @@ def call( Chat completion response or tool call result """ try: - print("we are calling", messages) # Emit call started event self._emit_call_started_event( messages=messages, @@ -144,7 +143,6 @@ def call( completion_params = self._prepare_completion_params( formatted_messages, system_message, tools ) - print("completion_params", completion_params) # Handle streaming vs non-streaming if self.stream: @@ -300,7 +298,6 @@ def _handle_completion( ) -> str | Any: """Handle non-streaming message completion.""" try: - print("params", params) response: Message = self.client.messages.create(**params) except Exception as e: From 97c2cbd11069d4cef4db37f7bd1d70e967af39e1 Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Wed, 15 Oct 2025 11:12:35 -0700 Subject: [PATCH 3/6] test: add fixture to mock ANTHROPIC_API_KEY for tests - Introduced a pytest fixture to automatically mock the ANTHROPIC_API_KEY environment variable for all tests in the test_anthropic.py module. - This change ensures that tests can run without requiring a real API key, improving test isolation and reliability. --- lib/crewai/tests/llms/anthropic/test_anthropic.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index 7d0780561d..90a0eb766d 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -5,11 +5,16 @@ import pytest from crewai.llm import LLM -from crewai.llms.providers.anthropic.completion import AnthropicCompletion from crewai.crew import Crew from crewai.agent import Agent from crewai.task import Task -from crewai.cli.constants import DEFAULT_LLM_MODEL + + +@pytest.fixture(autouse=True) +def mock_anthropic_api_key(): + """Automatically mock ANTHROPIC_API_KEY for all tests in this module.""" + with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}): + yield def test_anthropic_completion_is_used_when_anthropic_provider(): From 3261cd39b3baa8cea6339ed5939a613c8b45bbb4 Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Thu, 16 Oct 2025 10:22:48 -0700 Subject: [PATCH 4/6] refactor: streamline streaming message handling in AnthropicCompletion class - Removed the 'stream' parameter from the API call as it is set internally by the SDK. - Simplified the handling of tool use events and response construction by extracting token usage from the final message. - Enhanced the flow for managing tool use conversation, ensuring proper integration with the streaming API response. --- .../llms/providers/anthropic/completion.py | 82 +++++-------------- 1 file changed, 19 insertions(+), 63 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index ffcaf30773..37eed9f794 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -1,4 +1,3 @@ -import json import logging import os from typing import Any @@ -357,12 +356,14 @@ def _handle_streaming_completion( ) -> str: """Handle streaming message completion.""" full_response = "" - tool_uses = {} + + # Remove 'stream' parameter as messages.stream() doesn't accept it + # (the SDK sets it internally) + stream_params = {k: v for k, v in params.items() if k != "stream"} # Make streaming API call - with self.client.messages.stream(**params) as stream: + with self.client.messages.stream(**stream_params) as stream: for event in stream: - # Handle content delta events if hasattr(event, "delta") and hasattr(event.delta, "text"): text_delta = event.delta.text full_response += text_delta @@ -372,66 +373,23 @@ def _handle_streaming_completion( from_agent=from_agent, ) - # Handle tool use events - elif hasattr(event, "delta") and hasattr(event.delta, "partial_json"): - # Tool use streaming - accumulate JSON - tool_id = getattr(event, "index", "default") - if tool_id not in tool_uses: - tool_uses[tool_id] = { - "name": "", - "input": "", - } - - if hasattr(event.delta, "name"): - tool_uses[tool_id]["name"] = event.delta.name - if hasattr(event.delta, "partial_json"): - tool_uses[tool_id]["input"] += event.delta.partial_json - - # Handle completed tool uses - if tool_uses and available_functions: - # Convert streamed tool uses to ToolUseBlock-like objects for consistency - tool_use_blocks = [] - for tool_id, tool_data in tool_uses.items(): - try: - function_args = json.loads(tool_data["input"]) - except json.JSONDecodeError as e: - logging.error(f"Failed to parse streamed tool arguments: {e}") - continue - - # Create a mock ToolUseBlock-like object - class MockToolUse: - def __init__(self, tool_id: str, name: str, input_args: dict): - self.id = tool_id - self.name = name - self.input = input_args - - tool_use_blocks.append( - MockToolUse(tool_id, tool_data["name"], function_args) - ) - - if tool_use_blocks: - # Create a mock response object for the tool conversation flow - class MockResponse: - def __init__(self, content_blocks): - self.content = content_blocks - - # Combine text content and tool uses in the response - response_content = [] - if full_response.strip(): # Add text content if any - - class MockTextBlock: - def __init__(self, text: str): - self.text = text + final_message: Message = stream.get_final_message() - response_content.append(MockTextBlock(full_response)) + usage = self._extract_anthropic_token_usage(final_message) + self._track_token_usage_internal(usage) - response_content.extend(tool_use_blocks) - mock_response = MockResponse(response_content) + if final_message.content and available_functions: + tool_uses = [ + block + for block in final_message.content + if isinstance(block, ToolUseBlock) + ] + if tool_uses: # Handle tool use conversation flow return self._handle_tool_use_conversation( - mock_response, - tool_use_blocks, + final_message, + tool_uses, params, available_functions, from_task, @@ -454,10 +412,8 @@ def __init__(self, text: str): def _handle_tool_use_conversation( self, - initial_response: Message - | Any, # Can be Message or mock response from streaming - tool_uses: list[ToolUseBlock] - | list[Any], # Can be ToolUseBlock or mock objects + initial_response: Message, + tool_uses: list[ToolUseBlock], params: dict[str, Any], available_functions: dict[str, Any], from_task: Any | None = None, From d84bf3a5190cf99856bdc067cf8a88f2a9ab469b Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Thu, 16 Oct 2025 10:29:43 -0700 Subject: [PATCH 5/6] fix streaming here too --- lib/crewai/src/crewai/events/event_listener.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crewai/src/crewai/events/event_listener.py b/lib/crewai/src/crewai/events/event_listener.py index 8adbc21bbd..b98dccac9d 100644 --- a/lib/crewai/src/crewai/events/event_listener.py +++ b/lib/crewai/src/crewai/events/event_listener.py @@ -386,7 +386,7 @@ def on_llm_stream_chunk(source, event: LLMStreamChunkEvent): # Read from the in-memory stream content = self.text_stream.read() - _printer.print(content, end="", flush=True) + _printer.print(content) self.next_chunk = self.text_stream.tell() # ----------- LLM GUARDRAIL EVENTS ----------- From 40461da284fb502fe611d7d64c1a8521b0adb579 Mon Sep 17 00:00:00 2001 From: lorenzejay Date: Thu, 16 Oct 2025 10:31:05 -0700 Subject: [PATCH 6/6] fix: improve error handling in tool conversion for AnthropicCompletion class - Enhanced exception handling during tool conversion by catching KeyError and ValueError. - Added logging for conversion errors to aid in debugging and maintain robustness in tool integration. --- .../src/crewai/llms/providers/anthropic/completion.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 37eed9f794..e863715cf0 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -215,14 +215,9 @@ def _convert_tools_for_interference(self, tools: list[dict]) -> list[dict]: from crewai.llms.providers.utils.common import safe_tool_conversion name, description, parameters = safe_tool_conversion(tool, "Anthropic") - except (ImportError, Exception): - name = tool.get("name", "unknown_tool") - description = tool.get("description", "A tool function") - parameters = ( - tool.get("input_schema") - or tool.get("parameters") - or tool.get("schema") - ) + except (ImportError, KeyError, ValueError) as e: + logging.error(f"Error converting tool to Anthropic format: {e}") + raise e anthropic_tool = { "name": name,