From 517ba1b1b7c37c7ee2d88519d28297dad1d58957 Mon Sep 17 00:00:00 2001 From: Anton Sokolchenko Date: Sat, 26 Jul 2025 16:43:50 +0200 Subject: [PATCH] Fix text generation endpoint --- examples/server/server.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 42f0b17bd..399d45f89 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1615,10 +1615,10 @@ struct server_context { std::vector oaicompat_msg_diffs; slot.update_chat_msg(oaicompat_msg_diffs); - // Following original llama.cpp pattern: send empty content in streaming mode - // Clean content comes through oaicompat_msg_diffs instead of raw tokens + // For text completion endpoints, send actual content; for chat completion, use diffs + // OpenAI-compatible chat endpoints use empty content with diffs for tool calls res.data = json { - {"content", ""}, // Empty - clean content provided via diffs + {"content", slot.oaicompat ? "" : tkn.text_to_send}, // Text completion needs actual content {"stop", false}, {"id_slot", slot.id}, {"multimodal", false}