Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
fb7d01f
Implement function calling / tools for ik_llama.cpp for Kimi K2
iSevenDays Jul 17, 2025
7f54f55
Implement basic tool choice
iSevenDays Jul 17, 2025
e9e7fe6
Backport llama.cpp tool calls support
iSevenDays Jul 20, 2025
2d46776
Enhance function calls with improved chat parser and string utilities
iSevenDays Jul 21, 2025
b88626d
Enhance function calling with unified streaming and parser improvements
iSevenDays Jul 22, 2025
3513db9
Replace hardcoded values in kimi_k2_parser.hpp with named constants
iSevenDays Jul 22, 2025
d230096
Fix duplicate common_chat_parse definition
iSevenDays Jul 22, 2025
3eff579
Fix JSON assertion failure in function call parsing
iSevenDays Jul 22, 2025
cd0392f
Merge branch 'ikawrakow:main' into function_calling
iSevenDays Jul 23, 2025
3fd9758
Add comprehensive Qwen3 XML tool calling support with unit tests
iSevenDays Jul 23, 2025
de31581
Add DeepSeek R1 function calling support with comprehensive unit tests
iSevenDays Jul 23, 2025
0272064
Add partial parsing support for JSON and regex
iSevenDays Jul 23, 2025
f38a524
Add format_chat integration tests for Qwen3 tool injection
iSevenDays Jul 23, 2025
ff6be37
Fix Qwen3 tool call parsing - pass model name to parser
iSevenDays Jul 23, 2025
8726ae5
Fix non-streaming path to use model-specific parsing
iSevenDays Jul 23, 2025
aff9de3
Update Qwen3 function call handling in server and tests
iSevenDays Jul 24, 2025
501bbe9
Merge origin/main into qwen3-function-calls branch
iSevenDays Jul 24, 2025
d42de28
Add DeepSeek-R1 function call parsing support
iSevenDays Jul 25, 2025
a493e82
Update function_calls.md documentation for DeepSeek-R1 Format 4
iSevenDays Jul 26, 2025
a80811d
Merge origin/main with DeepSeek-R1 implementation
iSevenDays Jul 26, 2025
343304a
Fix merge conflict in test-function-calls.cpp
iSevenDays Jul 26, 2025
80c4873
Fix DeepSeek R1 parsing issue with responses wrapped in think tags
iSevenDays Aug 7, 2025
7403fa5
Implement proper reasoning handling following original llama.cpp patt…
iSevenDays Aug 8, 2025
4f9cbda
TDD SUCCESS: Fix DeepSeek R1 thinking tag termination issue
iSevenDays Aug 8, 2025
31661a1
Complete server integration fix for DeepSeek R1 thinking tag termination
iSevenDays Aug 8, 2025
a1b3d4e
Add TDD test case for DeepSeek R1 thinking tag termination issue
iSevenDays Aug 8, 2025
094a85c
Add remaining TDD test changes for DeepSeek R1 thinking tag fix
iSevenDays Aug 8, 2025
c41d8a1
Merge upstream main - resolve conflicts in chat.cpp and test file
iSevenDays Aug 8, 2025
9091590
Add debug output after upstream merge
iSevenDays Aug 8, 2025
bda7454
Remove temporary benchmark and debug files
iSevenDays Aug 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 28 additions & 18 deletions common/chat-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,28 +82,38 @@ bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
}

bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
auto start_pos = input_.find(start_think, pos_);
if (start_pos == std::string::npos) {
return false;
}
auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
auto stripped_reasoning = string_strip(reasoning);
if (stripped_reasoning.empty()) {
return;
}
if (syntax_.reasoning_in_content) {
add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
add_content(stripped_reasoning);
if (closed) {
add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
}
} else {
add_reasoning_content(stripped_reasoning);
}
};

auto end_pos = input_.find(end_think, start_pos + start_think.size());
if (end_pos == std::string::npos) {
if (is_partial_) {
// Partial reasoning content
auto reasoning = input_.substr(start_pos + start_think.size());
add_reasoning_content(string_strip(reasoning));
pos_ = input_.size();
if (syntax_.reasoning_format != COMMON_REASONING_FORMAT_NONE) {
if (syntax_.thinking_forced_open || try_consume_literal(start_think)) {
if (auto res = try_find_literal(end_think)) {
handle_reasoning(res->prelude, /* closed */ true);
consume_spaces();
return true;
}
auto rest = consume_rest();
if (!rest.empty()) {
handle_reasoning(rest, /* closed */ !is_partial());
}
// Allow unclosed thinking tags for now (following original llama.cpp)
return true;
}
return false;
}

// Extract reasoning content
auto reasoning = input_.substr(start_pos + start_think.size(), end_pos - start_pos - start_think.size());
add_reasoning_content(string_strip(reasoning));
pos_ = end_pos + end_think.size();
return true;
return false;
}

std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_literal_legacy(const std::string & literal) {
Expand Down
3 changes: 3 additions & 0 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
throw; // Re-throw for partial mode
}
}

// Add any remaining content (critical for responses without tool calls)
builder.add_content(builder.consume_rest());
}

// Parse DeepSeek R1 tools array format following original llama.cpp parse_prefixed_json_tool_call_array pattern
Expand Down
10 changes: 10 additions & 0 deletions common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,18 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_KIMI_K2, // Our custom format (keep last for backward compatibility)
};

enum common_reasoning_format {
COMMON_REASONING_FORMAT_NONE,
COMMON_REASONING_FORMAT_DEEPSEEK,
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY,
};

struct common_chat_syntax {
common_chat_format format = COMMON_CHAT_FORMAT_KIMI_K2;
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
bool reasoning_in_content = false;
bool thinking_forced_open = false;
bool enable_thinking = false;
bool enable_tool_calls = true;
};
Expand Down
2 changes: 2 additions & 0 deletions examples/server/function_calls.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ static ik_chat_msg parse_chat_message_incremental(const std::string& content, bo
try {
common_chat_syntax syntax;
syntax.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
syntax.reasoning_in_content = true; // Fix for thinking tag termination issue
syntax.enable_tool_calls = true;

common_chat_msg_parser parser(content, is_partial, syntax);
Expand Down
59 changes: 58 additions & 1 deletion tests/test-function-calls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3298,11 +3298,68 @@ int main() {
std::cout << "✅ PASS: Qwen3 XML tool calls -> finish_reason='tool_calls'" << std::endl;

std::cout << "🎯 All streaming finish_reason tests passed!" << std::endl;

// TDD: Test for thinking tag termination issue - Reproduce user's exact complaint
std::cout << std::endl;
std::cout << "🧠 Testing DeepSeek R1 thinking tag termination issue..." << std::endl;

// Test case: Response wrapped entirely in think tags (reported issue)
std::string wrapped_response = "<think>This should be content but is wrapped in think tags</think>";

std::cout << "\n 1. REPRODUCING FAILURE - Without fix (reasoning_in_content=false):" << std::endl;

// First reproduce the failing behavior that user reported
common_chat_syntax broken_syntax;
broken_syntax.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
broken_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
broken_syntax.reasoning_in_content = false; // This causes the reported issue
broken_syntax.enable_tool_calls = false;

try {
auto broken_msg = common_chat_parse(wrapped_response, false, broken_syntax);
std::cout << " Content: '" << broken_msg.content << "'" << std::endl;
std::cout << " Reasoning: '" << broken_msg.reasoning_content << "'" << std::endl;

if (broken_msg.content.empty() && !broken_msg.reasoning_content.empty()) {
std::cout << " ❌ REPRODUCED USER BUG: Content disappears (thinking tags don't terminate properly)" << std::endl;
std::cout << " User sees: EMPTY CONTENT - this is exactly what was reported!" << std::endl;
}
} catch (const std::exception& e) {
std::cout << " ❌ Exception: " << e.what() << std::endl;
}

std::cout << "\n 2. DEMONSTRATING FIX - With fix (reasoning_in_content=true):" << std::endl;

// Now show the fix works
common_chat_syntax fixed_syntax;
fixed_syntax.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
fixed_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
fixed_syntax.reasoning_in_content = true; // Key fix: display thinking as content
fixed_syntax.enable_tool_calls = false;

try {
auto msg = common_chat_parse(wrapped_response, false, fixed_syntax);
std::cout << " Content: '" << msg.content << "'" << std::endl;
std::cout << " Reasoning: '" << msg.reasoning_content << "'" << std::endl;

if (msg.content.find("This should be content but is wrapped in think tags") != std::string::npos) {
std::cout << " ✅ PASS: Content properly preserved from think tags (with reasoning_in_content=true)" << std::endl;
std::cout << " User sees: Full content - this fixes the reported issue!" << std::endl;
} else if (msg.content.empty() && !msg.reasoning_content.empty()) {
std::cout << " ❌ FAILING TEST: Entire response treated as reasoning instead of content!" << std::endl;
std::cout << " Expected: Content should contain the text from within think tags" << std::endl;
} else {
std::cout << " ⚠️ PARTIAL: Some content found but may not contain expected text" << std::endl;
}
} catch (const std::exception& e) {
std::cout << " ❌ Exception in thinking tag test: " << e.what() << std::endl;
}

} catch (const std::exception& e) {
std::cout << std::endl;
std::cout << "❌ Test failed with exception: " << e.what() << std::endl;
return 1;
}

return 0;
}
}