Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,7 @@ struct server_context {
res.stop = true;
res.data = json {
{"content", !slot.params.stream ? slot.generated_text : ""},
{"generated_text", slot.generated_text}, // Always include full text for finish_reason logic
{"id_slot", slot.id},
{"stop", true},
{"model", params.model_alias},
Expand Down Expand Up @@ -2822,11 +2823,22 @@ static std::vector<json> format_partial_response_oaicompat(server_task_result ta
std::string content = json_value(result, "content", std::string(""));

std::string finish_reason;
if (stopped_word || stopped_eos) {
finish_reason = "stop";
}
if (stopped_limit) {
finish_reason = "length";
} else if (stopped_word || stopped_eos) {
// Following original llama.cpp pattern: finish_reason = oaicompat_msg.tool_calls.empty() ? "stop" : "tool_calls"
// Use generated_text (complete content) for finish_reason logic, not content (empty in streaming)
std::string generated_text = json_value(result, "generated_text", std::string(""));
ik_chat_msg final_msg = parse_chat_message_incremental(generated_text, false, modelname);

// Debug logging
LOG_INFO("DEBUG: Streaming finish_reason check", {
{"generated_text", generated_text},
{"model_name", modelname},
{"tool_calls_count", final_msg.tool_calls.size()}
});

finish_reason = final_msg.tool_calls.empty() ? "stop" : "tool_calls";
}

std::time_t t = std::time(0);
Expand Down
28 changes: 28 additions & 0 deletions tests/test-function-calls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2992,6 +2992,34 @@ int main() {
assert(extracted.find("<|tool▁calls▁begin|>") == std::string::npos);
std::cout << "✅ PASS: DeepSeek R1 content extraction works" << std::endl;

// Test streaming finish_reason logic (core of the fix)
std::cout << "\n🎯 Testing Streaming finish_reason Logic:" << std::endl;

// Test Case 1: Content with tool calls should lead to finish_reason="tool_calls"
std::string tool_call_content = "functions.get_weather:0{\"location\": \"Tokyo\"}";
ik_chat_msg msg_with_tools = parse_chat_message_incremental(tool_call_content, false, "kimi-k2");
bool should_be_tool_calls = !msg_with_tools.tool_calls.empty();
std::string finish_reason_with_tools = should_be_tool_calls ? "tool_calls" : "stop";
assert(finish_reason_with_tools == "tool_calls");
std::cout << "✅ PASS: Content with tool calls -> finish_reason='tool_calls'" << std::endl;

// Test Case 2: Content without tool calls should lead to finish_reason="stop"
std::string regular_content = "This is just regular text without any tool calls.";
ik_chat_msg msg_without_tools = parse_chat_message_incremental(regular_content, false, "kimi-k2");
bool should_be_stop = msg_without_tools.tool_calls.empty();
std::string finish_reason_without_tools = should_be_stop ? "stop" : "tool_calls";
assert(finish_reason_without_tools == "stop");
std::cout << "✅ PASS: Content without tool calls -> finish_reason='stop'" << std::endl;

// Test Case 3: Qwen3 XML format tool calls
std::string qwen3_content = "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Tokyo\"}}\n</tool_call>";
ik_chat_msg qwen3_msg = parse_chat_message_incremental(qwen3_content, false, "qwen3-7b");
bool qwen3_should_be_tool_calls = !qwen3_msg.tool_calls.empty();
std::string qwen3_finish_reason = qwen3_should_be_tool_calls ? "tool_calls" : "stop";
assert(qwen3_finish_reason == "tool_calls");
std::cout << "✅ PASS: Qwen3 XML tool calls -> finish_reason='tool_calls'" << std::endl;

std::cout << "🎯 All streaming finish_reason tests passed!" << std::endl;
} catch (const std::exception& e) {
std::cout << std::endl;
std::cout << "❌ Test failed with exception: " << e.what() << std::endl;
Expand Down