From cb86e12942a4cb6e8fa8b9df5302ec000c56d13d Mon Sep 17 00:00:00 2001 From: "ScrewTSW (public-projects)" Date: Thu, 21 May 2026 03:39:16 +0200 Subject: [PATCH] server: expose prompt token counts in /slots endpoint Add n_prompt_tokens, n_prompt_tokens_processed, and n_prompt_tokens_cache to the /slots JSON response. These fields are already tracked internally but were not exposed, making it impossible for clients to monitor prompt evaluation progress during processing. Co-Authored-By: Claude Opus 4.6 (1M context) --- tools/server/server-context.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index f517310266c0..f40968b7e035 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -506,6 +506,9 @@ struct server_slot { if (ptask) { res["id_task"] = ptask->id; + res["n_prompt_tokens"] = (int32_t) prompt.tokens.size(); + res["n_prompt_tokens_processed"] = n_prompt_tokens_processed; + res["n_prompt_tokens_cache"] = n_prompt_tokens_cache; res["params"] = ptask->params.to_json(only_metrics); res["next_token"] = { {