Enable LLM function calls (#643)

iSevenDays · web-flow · commit 4e9c78c03960 · 2025-07-24T20:24:12.000+02:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -1683,6 +1683,7 @@ struct server_context {
         res.stop     = true;
         res.data     = json {
             {"content",             !slot.params.stream ? slot.generated_text : ""},
+            {"generated_text",      slot.generated_text},  // Always include full text for finish_reason logic
             {"id_slot",             slot.id},
             {"stop",                true},
             {"model",               params.model_alias},
@@ -2822,11 +2823,22 @@ static std::vector<json> format_partial_response_oaicompat(server_task_result ta
     std::string content = json_value(result, "content", std::string(""));
 
     std::string finish_reason;
-    if (stopped_word || stopped_eos) {
-        finish_reason = "stop";
-    }
     if (stopped_limit) {
         finish_reason = "length";
+    } else if (stopped_word || stopped_eos) {
+        // Following original llama.cpp pattern: finish_reason = oaicompat_msg.tool_calls.empty() ? "stop" : "tool_calls"
+        // Use generated_text (complete content) for finish_reason logic, not content (empty in streaming)
+        std::string generated_text = json_value(result, "generated_text", std::string(""));
+        ik_chat_msg final_msg = parse_chat_message_incremental(generated_text, false, modelname);
+        
+        // Debug logging
+        LOG_INFO("DEBUG: Streaming finish_reason check", {
+            {"generated_text", generated_text},
+            {"model_name", modelname}, 
+            {"tool_calls_count", final_msg.tool_calls.size()}
+        });
+        
+        finish_reason = final_msg.tool_calls.empty() ? "stop" : "tool_calls";
     }
 
     std::time_t t = std::time(0);
diff --git a/tests/test-function-calls.cpp b/tests/test-function-calls.cpp
@@ -2992,6 +2992,34 @@ int main() {
         assert(extracted.find("<｜tool▁calls▁begin｜>") == std::string::npos);
         std::cout << "✅ PASS: DeepSeek R1 content extraction works" << std::endl;
         
+        // Test streaming finish_reason logic (core of the fix)
+        std::cout << "\n🎯 Testing Streaming finish_reason Logic:" << std::endl;
+        
+        // Test Case 1: Content with tool calls should lead to finish_reason="tool_calls"
+        std::string tool_call_content = "functions.get_weather:0{\"location\": \"Tokyo\"}";
+        ik_chat_msg msg_with_tools = parse_chat_message_incremental(tool_call_content, false, "kimi-k2");
+        bool should_be_tool_calls = !msg_with_tools.tool_calls.empty();
+        std::string finish_reason_with_tools = should_be_tool_calls ? "tool_calls" : "stop";
+        assert(finish_reason_with_tools == "tool_calls");
+        std::cout << "✅ PASS: Content with tool calls -> finish_reason='tool_calls'" << std::endl;
+        
+        // Test Case 2: Content without tool calls should lead to finish_reason="stop"
+        std::string regular_content = "This is just regular text without any tool calls.";
+        ik_chat_msg msg_without_tools = parse_chat_message_incremental(regular_content, false, "kimi-k2");
+        bool should_be_stop = msg_without_tools.tool_calls.empty();
+        std::string finish_reason_without_tools = should_be_stop ? "stop" : "tool_calls";
+        assert(finish_reason_without_tools == "stop");
+        std::cout << "✅ PASS: Content without tool calls -> finish_reason='stop'" << std::endl;
+        
+        // Test Case 3: Qwen3 XML format tool calls
+        std::string qwen3_content = "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Tokyo\"}}\n</tool_call>";
+        ik_chat_msg qwen3_msg = parse_chat_message_incremental(qwen3_content, false, "qwen3-7b");
+        bool qwen3_should_be_tool_calls = !qwen3_msg.tool_calls.empty();
+        std::string qwen3_finish_reason = qwen3_should_be_tool_calls ? "tool_calls" : "stop";
+        assert(qwen3_finish_reason == "tool_calls");
+        std::cout << "✅ PASS: Qwen3 XML tool calls -> finish_reason='tool_calls'" << std::endl;
+        
+        std::cout << "🎯 All streaming finish_reason tests passed!" << std::endl;
     } catch (const std::exception& e) {
         std::cout << std::endl;
         std::cout << "❌ Test failed with exception: " << e.what() << std::endl;