File tree Expand file tree Collapse file tree 1 file changed +4
-6
lines changed Expand file tree Collapse file tree 1 file changed +4
-6
lines changed Original file line number Diff line number Diff line change @@ -179,8 +179,8 @@ llama_context::llama_context(
179179 // init the memory module
180180 if (!hparams.vocab_only ) {
181181 llama_memory_params params_mem = {
182- /* .type_k =*/ params.type_k ,
183- /* .type_v =*/ params.type_v ,
182+ /* .type_k =*/ params.type_k ,
183+ /* .type_v =*/ params.type_v ,
184184 };
185185
186186 memory.reset (model.create_memory (cparams, params_mem));
@@ -1008,11 +1008,9 @@ int llama_context::encode(llama_batch & inp_batch) {
10081008 return -1 ;
10091009 }
10101010
1011- llama_kv_cache * kv_self = static_cast <llama_kv_cache *>(memory.get ());
1012-
10131011 // temporary allocate memory for the input batch if needed
1014- // TODO: this is incorrect for multiple sequences because get_pos_max() is the maximum across all sequences
1015- llama_batch_allocr batch_allocr (inp_batch, inp_batch.pos ? -1 : kv_self-> get_pos_max () + 1 );
1012+ // note: during encode, we always pass the full sequence starting from pos = 0
1013+ llama_batch_allocr batch_allocr (inp_batch, inp_batch.pos ? -1 : 0 );
10161014
10171015 const llama_batch & batch = batch_allocr.batch ;
10181016 const int32_t n_tokens = batch.n_tokens ;
You can’t perform that action at this time.
0 commit comments