run-llama · logan-markewich · May 30, 2025 · May 7, 2025 · May 14, 2025 · May 14, 2025
diff --git a/docs/docs/understanding/extraction/lower_level.md b/docs/docs/understanding/extraction/lower_level.md
@@ -13,7 +13,7 @@ resp = llm.chat_with_tools(
     [tool],
     # chat_history=chat_history,  # can optionally pass in chat history instead of user_msg
     user_msg="Extract an invoice from the following text: " + text,
-    # tool_choice="Invoice",  # can optionally force the tool call
+    tool_required=True,  # can optionally force the tool call
 )
 
 tool_calls = llm.get_tool_calls_from_response(

diff --git a/llama-index-core/llama_index/core/llms/function_calling.py b/llama-index-core/llama_index/core/llms/function_calling.py
@@ -33,6 +33,7 @@ def chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,  # if required, LLM should only call tools, and not return a response
         **kwargs: Any,
     ) -> ChatResponse:
         """Chat with function calling."""
@@ -42,6 +43,7 @@ def chat_with_tools(
             chat_history=chat_history,
             verbose=verbose,
             allow_parallel_tool_calls=allow_parallel_tool_calls,
+            tool_required=tool_required,
             **kwargs,
         )
         response = self.chat(**chat_kwargs)
@@ -59,6 +61,7 @@ async def achat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponse:
         """Async chat with function calling."""
@@ -68,6 +71,7 @@ async def achat_with_tools(
             chat_history=chat_history,
             verbose=verbose,
             allow_parallel_tool_calls=allow_parallel_tool_calls,
+            tool_required=tool_required,
             **kwargs,
         )
         response = await self.achat(**chat_kwargs)
@@ -85,6 +89,7 @@ def stream_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponseGen:
         """Stream chat with function calling."""
@@ -94,6 +99,7 @@ def stream_chat_with_tools(
             chat_history=chat_history,
             verbose=verbose,
             allow_parallel_tool_calls=allow_parallel_tool_calls,
+            tool_required=tool_required,
             **kwargs,
         )
         # TODO: no validation for streaming outputs
@@ -106,6 +112,7 @@ async def astream_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponseAsyncGen:
         """Async stream chat with function calling."""
@@ -115,6 +122,7 @@ async def astream_chat_with_tools(
             chat_history=chat_history,
             verbose=verbose,
             allow_parallel_tool_calls=allow_parallel_tool_calls,
+            tool_required=tool_required,
             **kwargs,
         )
         # TODO: no validation for streaming outputs
@@ -128,6 +136,7 @@ def _prepare_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,  # if required, LLM should only call tools, and not return a response
         **kwargs: Any,
     ) -> Dict[str, Any]:
         """Prepare the arguments needed to let the LLM chat with tools."""

diff --git a/llama-index-core/tests/agent/function_calling/test_step.py b/llama-index-core/tests/agent/function_calling/test_step.py
@@ -97,6 +97,7 @@ def _prepare_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         """Prepare chat with tools."""
@@ -109,6 +110,7 @@ def chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponse:
         return ChatResponse(message=ChatMessage(role="user", content=""))
@@ -120,6 +122,7 @@ async def achat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponse:
         return ChatResponse(message=ChatMessage(role="user", content=""))

diff --git a/llama-index-core/tests/llms/test_function_calling.py b/llama-index-core/tests/llms/test_function_calling.py
@@ -69,6 +69,7 @@ def _prepare_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         return {"messages": []}

diff --git a/llama-index-integrations/llms/llama-index-llms-ai21/llama_index/llms/ai21/base.py b/llama-index-integrations/llms/llama-index-llms-ai21/llama_index/llms/ai21/base.py
@@ -216,6 +216,7 @@ def _prepare_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,  # ai21 does not support configuring the tool_choice
         **kwargs: Any,
     ) -> Dict[str, Any]:
         tool_specs = [tool.metadata.to_openai_tool() for tool in tools]

diff --git a/llama-index-integrations/llms/llama-index-llms-ai21/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-ai21/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
 
 [project]
 name = "llama-index-llms-ai21"
-version = "0.4.0"
+version = "0.5.0"
 description = "llama-index llms ai21 integration"
 authors = [{name = "Your Name", email = "[email protected]"}]
 requires-python = ">=3.9,<4.0"

diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py
@@ -641,13 +641,22 @@ async def gen() -> AsyncGenerator[AnthropicCompletionResponse, None]:
 
         return gen()
 
+    def _map_tool_choice_to_anthropic(
+        self, tool_required: bool, allow_parallel_tool_calls: bool
+    ) -> dict:
+        return {
+            "disable_parallel_tool_use": not allow_parallel_tool_calls,
+            "type": "any" if tool_required else "auto",
+        }
+
     def _prepare_chat_with_tools(
         self,
         tools: List["BaseTool"],
         user_msg: Optional[Union[str, ChatMessage]] = None,
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         """Prepare the chat with tools."""
@@ -672,7 +681,14 @@ def _prepare_chat_with_tools(
             ):
                 tool_dicts[-1]["cache_control"] = {"type": "ephemeral"}
 
-        return {"messages": chat_history, "tools": tool_dicts, **kwargs}
+        return {
+            "messages": chat_history,
+            "tools": tool_dicts,
+            "tool_choice": self._map_tool_choice_to_anthropic(
+                tool_required, allow_parallel_tool_calls
+            ),
+            **kwargs,
+        }
 
     def _validate_chat_with_tools_response(
         self,

diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
 
 [project]
 name = "llama-index-llms-anthropic"
-version = "0.6.19"
+version = "0.7.0"
 description = "llama-index llms anthropic integration"
 authors = [{name = "Your Name", email = "[email protected]"}]
 requires-python = ">=3.9,<4.0"

diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/tests/test_llms_anthropic.py b/llama-index-integrations/llms/llama-index-llms-anthropic/tests/test_llms_anthropic.py
@@ -12,7 +12,9 @@
     MessageRole,
     ChatResponse,
 )
+from llama_index.core.tools import FunctionTool
 from llama_index.llms.anthropic import Anthropic
+from llama_index.llms.anthropic.base import AnthropicChatResponse
 
 
 def test_text_inference_embedding_class():
@@ -225,6 +227,36 @@ def pdf_url() -> str:
     return "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
 
 
+@pytest.mark.skipif(
+    os.getenv("ANTHROPIC_API_KEY") is None,
+    reason="Anthropic API key not available to test Anthropic integration",
+)
+def test_tool_required():
+    llm = Anthropic(model="claude-3-5-sonnet-latest")
+
+    search_tool = FunctionTool.from_defaults(fn=search)
+
+    # Test with tool_required=True
+    response = llm.chat_with_tools(
+        user_msg="What is the weather in Paris?",
+        tools=[search_tool],
+        tool_required=True,
+    )
+    assert isinstance(response, AnthropicChatResponse)
+    assert response.message.additional_kwargs["tool_calls"] is not None
+    assert len(response.message.additional_kwargs["tool_calls"]) > 0
+
+    # Test with tool_required=False
+    response = llm.chat_with_tools(
+        user_msg="Say hello!",
+        tools=[search_tool],
+        tool_required=False,
+    )
+    assert isinstance(response, AnthropicChatResponse)
+    # Should not use tools for a simple greeting
+    assert not response.message.additional_kwargs.get("tool_calls")
+
+
 @pytest.mark.skipif(
     os.getenv("ANTHROPIC_API_KEY") is None,
     reason="Anthropic API key not available to test Anthropic document uploading ",
@@ -244,3 +276,65 @@ def test_document_upload(tmp_path: Path, pdf_url: str) -> None:
     messages = [msg]
     response = llm.chat(messages)
     assert isinstance(response, ChatResponse)
+
+
+def test_map_tool_choice_to_anthropic():
+    """Test that tool_required is correctly mapped to Anthropic's tool_choice parameter."""
+    llm = Anthropic()
+
+    # Test with tool_required=True
+    tool_choice = llm._map_tool_choice_to_anthropic(
+        tool_required=True, allow_parallel_tool_calls=False
+    )
+    assert tool_choice["type"] == "any"
+    assert tool_choice["disable_parallel_tool_use"]
+
+    # Test with tool_required=False
+    tool_choice = llm._map_tool_choice_to_anthropic(
+        tool_required=False, allow_parallel_tool_calls=False
+    )
+    assert tool_choice["type"] == "auto"
+    assert tool_choice["disable_parallel_tool_use"]
+
+    # Test with allow_parallel_tool_calls=True
+    tool_choice = llm._map_tool_choice_to_anthropic(
+        tool_required=True, allow_parallel_tool_calls=True
+    )
+    assert tool_choice["type"] == "any"
+    assert not tool_choice["disable_parallel_tool_use"]
+
+
+def search(query: str) -> str:
+    """Search for information about a query."""
+    return f"Results for {query}"
+
+
+search_tool = FunctionTool.from_defaults(
+    fn=search, name="search_tool", description="A tool for searching information"
+)
+
+
+def test_prepare_chat_with_tools_tool_required():
+    """Test that tool_required is correctly passed to the API request when True."""
+    llm = Anthropic()
+
+    # Test with tool_required=True
+    result = llm._prepare_chat_with_tools(tools=[search_tool], tool_required=True)
+
+    assert result["tool_choice"]["type"] == "any"
+    assert len(result["tools"]) == 1
+    assert result["tools"][0]["name"] == "search_tool"
+
+
+def test_prepare_chat_with_tools_tool_not_required():
+    """Test that tool_required is correctly passed to the API request when False."""
+    llm = Anthropic()
+
+    # Test with tool_required=False (default)
+    result = llm._prepare_chat_with_tools(
+        tools=[search_tool],
+    )
+
+    assert result["tool_choice"]["type"] == "auto"
+    assert len(result["tools"]) == 1
+    assert result["tools"][0]["name"] == "search_tool"
diff --git a/...tegrations/llms/llama-index-llms-azure-inference/llama_index/llms/azure_inference/base.py b/...tegrations/llms/llama-index-llms-azure-inference/llama_index/llms/azure_inference/base.py
@@ -45,12 +45,17 @@
 
 from azure.ai.inference import ChatCompletionsClient
 from azure.ai.inference.aio import ChatCompletionsClient as ChatCompletionsClientAsync
+from azure.ai.inference.models import (
+    ChatCompletionsToolChoicePreset,
+    ChatCompletionsNamedToolChoice,
+)
 
 if TYPE_CHECKING:
     from llama_index.core.tools.types import BaseTool
     from llama_index.core.chat_engine.types import AgentChatResponse
     from azure.core.credentials import TokenCredential
 
+
 from azure.core.credentials import AzureKeyCredential
 from azure.core.exceptions import HttpResponseError
 from azure.ai.inference.models import (
@@ -357,6 +362,16 @@ def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
             raw=response.as_dict(),
         )
 
+    def _to_azure_tool_choice(
+        self, tool_required: bool
+    ) -> Optional[
+        Union[str, ChatCompletionsToolChoicePreset, ChatCompletionsNamedToolChoice]
+    ]:
+        if tool_required:
+            return ChatCompletionsToolChoicePreset.REQUIRED
+        else:
+            return ChatCompletionsToolChoicePreset.AUTO
+
     @llm_completion_callback()
     def complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
@@ -463,6 +478,7 @@ def chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponse:
         """Predict and call the tool."""
@@ -481,6 +497,7 @@ def chat_with_tools(
         response = self.chat(
             messages,
             tools=tool_specs,
+            tool_choice=self._to_azure_tool_choice(tool_required),
             **kwargs,
         )
         if not allow_parallel_tool_calls:
@@ -494,6 +511,7 @@ async def achat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> ChatResponse:
         """Predict and call the tool."""
@@ -512,6 +530,7 @@ async def achat_with_tools(
         response = await self.achat(
             messages,
             tools=tool_specs,
+            tool_choice=self._to_azure_tool_choice(tool_required),
             **kwargs,
         )
         if not allow_parallel_tool_calls:
@@ -561,6 +580,7 @@ def _prepare_chat_with_tools(
         chat_history: Optional[List[ChatMessage]] = None,
         verbose: bool = False,
         allow_parallel_tool_calls: bool = False,
+        tool_required: bool = False,
         **kwargs: Any,
     ) -> Dict[str, Any]:
         """Prepare the arguments needed to let the LLM chat with tools."""
@@ -575,5 +595,6 @@ def _prepare_chat_with_tools(
         return {
             "messages": chat_history,
             "tools": tool_dicts or None,
+            "tool_choice": self._to_azure_tool_choice(tool_required),
             **kwargs,
         }
diff --git a/llama-index-integrations/llms/llama-index-llms-azure-inference/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-azure-inference/pyproject.toml
@@ -26,7 +26,7 @@ dev = [
 
 [project]
 name = "llama-index-llms-azure-inference"
-version = "0.3.0"
+version = "0.4.0"
 description = "Integration for model supporting Azure AI model inference API in llama-index"
 authors = [{name = "Azure AI model inference group", email = "[email protected]"}]
 requires-python = ">=3.9,<4.0"