From dc2e8928c18db3004700794e50a31ac4504b4a31 Mon Sep 17 00:00:00 2001 From: Taylor Date: Wed, 11 Jun 2025 04:19:38 -0400 Subject: [PATCH] Pass --keep to llama-server --- tools/server/server.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 3b5e03528e2d7..1b1cf439baa5f 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -233,6 +233,7 @@ struct server_task { slot_params defaults; defaults.sampling = params_base.sampling; defaults.speculative = params_base.speculative; + defaults.n_keep = params_base.n_keep; // enabling this will output extra debug information in the HTTP responses from the server params.verbose = params_base.verbosity > 9; @@ -2060,6 +2061,7 @@ struct server_context { SLT_INF(slot, "new slot n_ctx_slot = %d\n", slot.n_ctx); slot.params.sampling = params_base.sampling; + slot.params.n_keep = params_base.n_keep; slot.callback_on_release = [this](int) { queue_tasks.pop_deferred_task();