Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2781,9 +2781,10 @@ struct server_context_impl {
}

llama_pos pos_next = slot.prompt.tokens.pos_next(n_past);
const bool has_new_tokens = (n_past < slot.task->n_tokens());
Comment thread
ggerganov marked this conversation as resolved.

// the largest pos_min required for a checkpoint to be useful
const auto pos_min_thold = std::max(0, pos_next - n_swa - 1);
const auto pos_min_thold = std::max(0, pos_next - n_swa - (has_new_tokens ? 0 : 1));

if (n_past > 0 && n_past <= slot.prompt.n_tokens()) {
const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx_tgt), slot.id);
Expand Down