File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -504,9 +504,11 @@ struct llama_server_context
504504 });
505505 }
506506
507+ bool tg = true ;
507508 while (n_past < embd.size ())
508509 {
509510 int n_eval = (int )embd.size () - n_past;
511+ tg = n_eval == 1 ;
510512 if (n_eval > params.n_batch )
511513 {
512514 n_eval = params.n_batch ;
@@ -633,7 +635,9 @@ struct llama_server_context
633635
634636 last_n_tokens.erase (last_n_tokens.begin ());
635637 last_n_tokens.push_back (result.tok );
636- num_tokens_predicted++;
638+ if (tg) {
639+ num_tokens_predicted++;
640+ }
637641 }
638642
639643 // add it to the context
@@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama)
11241128{
11251129 const auto timings = llama_get_timings (llama.ctx );
11261130
1127- assert (timings.n_eval == ptrdiff_t (llama.num_tokens_predicted ));
1128-
11291131 return json{
11301132 {" prompt_n" , timings.n_p_eval },
11311133 {" prompt_ms" , timings.t_p_eval_ms },
You can’t perform that action at this time.
0 commit comments