3333from ..types .content import ContentBlock , Messages
3434from ..types .exceptions import ContextWindowOverflowException , ModelThrottledException
3535from ..types .streaming import StreamEvent
36- from ..types .tools import ToolSpec
36+ from ..types .tools import ToolChoice , ToolSpec
37+ from ._validation import validate_config_keys , warn_on_tool_choice_not_supported
3738from .model import Model
3839
3940logger = logging .getLogger (__name__ )
@@ -149,12 +150,15 @@ def __init__(
149150 (connect, read) timeouts.
150151 **model_config: Configuration options for the llama.cpp model.
151152 """
153+ validate_config_keys (model_config , self .LlamaCppConfig )
154+
152155 # Set default model_id if not provided
153156 if "model_id" not in model_config :
154157 model_config ["model_id" ] = "default"
155158
156159 self .base_url = base_url .rstrip ("/" )
157160 self .config = dict (model_config )
161+ logger .debug ("config=<%s> | initializing" , self .config )
158162
159163 # Configure HTTP client
160164 if isinstance (timeout , tuple ):
@@ -173,19 +177,14 @@ def __init__(
173177 timeout = timeout_obj ,
174178 )
175179
176- logger .debug (
177- "base_url=<%s>, model_id=<%s> | initializing llama.cpp provider" ,
178- base_url ,
179- model_config .get ("model_id" ),
180- )
181-
182180 @override
183181 def update_config (self , ** model_config : Unpack [LlamaCppConfig ]) -> None : # type: ignore[override]
184182 """Update the llama.cpp model configuration with provided arguments.
185183
186184 Args:
187185 **model_config: Configuration overrides.
188186 """
187+ validate_config_keys (model_config , self .LlamaCppConfig )
189188 self .config .update (model_config )
190189
191190 @override
@@ -514,6 +513,7 @@ async def stream(
514513 messages : Messages ,
515514 tool_specs : Optional [list [ToolSpec ]] = None ,
516515 system_prompt : Optional [str ] = None ,
516+ tool_choice : ToolChoice | None = None ,
517517 ** kwargs : Any ,
518518 ) -> AsyncGenerator [StreamEvent , None ]:
519519 """Stream conversation with the llama.cpp model.
@@ -522,6 +522,8 @@ async def stream(
522522 messages: List of message objects to be processed by the model.
523523 tool_specs: List of tool specifications to make available to the model.
524524 system_prompt: System prompt to provide context to the model.
525+ tool_choice: Selection strategy for tool invocation. **Note: This parameter is accepted for
526+ interface consistency but is currently ignored for this model provider.**
525527 **kwargs: Additional keyword arguments for future extensibility.
526528
527529 Yields:
@@ -531,19 +533,21 @@ async def stream(
531533 ContextWindowOverflowException: When the context window is exceeded.
532534 ModelThrottledException: When the llama.cpp server is overloaded.
533535 """
536+ warn_on_tool_choice_not_supported (tool_choice )
537+
534538 # Track request start time for latency calculation
535539 start_time = time .perf_counter ()
536540
537541 try :
538- logger .debug ("formatting request for llama.cpp server " )
542+ logger .debug ("formatting request" )
539543 request = self ._format_request (messages , tool_specs , system_prompt )
540544 logger .debug ("request=<%s>" , request )
541545
542- logger .debug ("sending request to llama.cpp server " )
546+ logger .debug ("invoking model " )
543547 response = await self .client .post ("/v1/chat/completions" , json = request )
544548 response .raise_for_status ()
545549
546- logger .debug ("processing streaming response" )
550+ logger .debug ("got response from model " )
547551 yield self ._format_chunk ({"chunk_type" : "message_start" })
548552 yield self ._format_chunk ({"chunk_type" : "content_start" , "data_type" : "text" })
549553
@@ -648,12 +652,10 @@ async def stream(
648652 yield self ._format_chunk ({"chunk_type" : "content_stop" })
649653
650654 # Send stop reason
651- logger .debug ("finish_reason=%s, tool_calls=%s" , finish_reason , bool (tool_calls ))
652655 if finish_reason == "tool_calls" or tool_calls :
653656 stop_reason = "tool_calls" # Changed from "tool_use" to match format_chunk expectations
654657 else :
655658 stop_reason = finish_reason or "end_turn"
656- logger .debug ("stop_reason=%s" , stop_reason )
657659 yield self ._format_chunk ({"chunk_type" : "message_stop" , "data" : stop_reason })
658660
659661 # Send usage metadata if available
@@ -676,7 +678,7 @@ async def stream(
676678 }
677679 )
678680
679- logger .debug ("finished streaming response" )
681+ logger .debug ("finished streaming response from model " )
680682
681683 except httpx .HTTPStatusError as e :
682684 if e .response .status_code == 400 :
0 commit comments