File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change @@ -702,6 +702,8 @@ int llama_context::encode(llama_batch & inp_batch) {
702702 t_compute_start_us = ggml_time_us ();
703703 }
704704
705+ embd_seq.clear ();
706+
705707 n_queued_tokens += n_tokens;
706708
707709 const int64_t n_embd = hparams.n_embd ;
@@ -842,13 +844,13 @@ int llama_context::encode(llama_batch & inp_batch) {
842844}
843845
844846int llama_context::decode (llama_batch & inp_batch) {
845- if (inp_batch.n_tokens == 0 ) {
846- LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
847- return -1 ;
848- }
849-
850847 if (!memory) {
851848 LLAMA_LOG_WARN (" %s: cannot decode batches with this context\n " , __func__);
849+ return encode (inp_batch);
850+ }
851+
852+ if (inp_batch.n_tokens == 0 ) {
853+ LLAMA_LOG_ERROR (" %s: n_tokens == 0\n " , __func__);
852854 return -1 ;
853855 }
854856
Original file line number Diff line number Diff line change @@ -3941,7 +3941,7 @@ int main(int argc, char ** argv) {
39413941 const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok](
39423942 server_task_type type,
39433943 json & data,
3944- std::function<bool ()> is_connection_closed,
3944+ const std::function<bool ()> & is_connection_closed,
39453945 httplib::Response & res,
39463946 oaicompat_type oaicompat) {
39473947 GGML_ASSERT (type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL);
You can’t perform that action at this time.
0 commit comments