@@ -214,8 +214,10 @@ void llama_kv_cache_unified::clear(bool data) {
214214}
215215
216216bool  llama_kv_cache_unified::seq_rm (llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
217-     auto  & cells = v_cells[seq_to_stream.at (seq_id)];
218-     auto  & head  = v_heads[seq_to_stream.at (seq_id)];
217+     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
218+ 
219+     auto  & cells = v_cells[seq_to_stream[seq_id]];
220+     auto  & head  = v_heads[seq_to_stream[seq_id]];
219221
220222    uint32_t  new_head = cells.size ();
221223
@@ -263,8 +265,11 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
263265}
264266
265267void  llama_kv_cache_unified::seq_cp (llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) {
266-     const  auto  s0 = seq_to_stream.at (seq_id_src);
267-     const  auto  s1 = seq_to_stream.at (seq_id_dst);
268+     GGML_ASSERT (seq_id_src >= 0  && (size_t ) seq_id_src < seq_to_stream.size ());
269+     GGML_ASSERT (seq_id_dst >= 0  && (size_t ) seq_id_dst < seq_to_stream.size ());
270+ 
271+     const  auto  s0 = seq_to_stream[seq_id_src];
272+     const  auto  s1 = seq_to_stream[seq_id_dst];
268273
269274    if  (s0 == s1) {
270275        //  since both sequences are in the same stream, no data copy is necessary
@@ -343,8 +348,10 @@ void llama_kv_cache_unified::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id
343348}
344349
345350void  llama_kv_cache_unified::seq_keep (llama_seq_id seq_id) {
346-     auto  & cells = v_cells[seq_to_stream.at (seq_id)];
347-     auto  & head  = v_heads[seq_to_stream.at (seq_id)];
351+     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
352+ 
353+     auto  & cells = v_cells[seq_to_stream[seq_id]];
354+     auto  & head  = v_heads[seq_to_stream[seq_id]];
348355
349356    uint32_t  new_head = cells.size ();
350357
@@ -363,8 +370,10 @@ void llama_kv_cache_unified::seq_keep(llama_seq_id seq_id) {
363370}
364371
365372void  llama_kv_cache_unified::seq_add (llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) {
366-     auto  & cells = v_cells[seq_to_stream.at (seq_id)];
367-     auto  & head  = v_heads[seq_to_stream.at (seq_id)];
373+     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
374+ 
375+     auto  & cells = v_cells[seq_to_stream[seq_id]];
376+     auto  & head  = v_heads[seq_to_stream[seq_id]];
368377
369378    if  (shift == 0 ) {
370379        return ;
@@ -405,7 +414,9 @@ void llama_kv_cache_unified::seq_add(llama_seq_id seq_id, llama_pos p0, llama_po
405414}
406415
407416void  llama_kv_cache_unified::seq_div (llama_seq_id seq_id, llama_pos p0, llama_pos p1, int  d) {
408-     auto  & cells = v_cells[seq_to_stream.at (seq_id)];
417+     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
418+ 
419+     auto  & cells = v_cells[seq_to_stream[seq_id]];
409420
410421    if  (d == 1 ) {
411422        return ;
@@ -436,13 +447,17 @@ void llama_kv_cache_unified::seq_div(llama_seq_id seq_id, llama_pos p0, llama_po
436447}
437448
438449llama_pos llama_kv_cache_unified::seq_pos_min (llama_seq_id seq_id) const  {
439-     const  auto  & cells = v_cells[seq_to_stream.at (seq_id)];
450+     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
451+ 
452+     const  auto  & cells = v_cells[seq_to_stream[seq_id]];
440453
441454    return  cells.seq_pos_min (seq_id);
442455}
443456
444457llama_pos llama_kv_cache_unified::seq_pos_max (llama_seq_id seq_id) const  {
445-     const  auto  & cells = v_cells[seq_to_stream.at (seq_id)];
458+     GGML_ASSERT (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ());
459+ 
460+     const  auto  & cells = v_cells[seq_to_stream[seq_id]];
446461
447462    return  cells.seq_pos_max (seq_id);
448463}
@@ -606,8 +621,11 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
606621        const  size_t  n_copy = sc_info.ssrc .size ();
607622
608623        for  (size_t  i = 0 ; i < n_copy; ++i) {
609-             const  auto  ssrc = sc_info.ssrc .at (i);
610-             const  auto  sdst = sc_info.sdst .at (i);
624+             const  auto  ssrc = sc_info.ssrc [i];
625+             const  auto  sdst = sc_info.sdst [i];
626+ 
627+             assert (ssrc < n_stream);
628+             assert (sdst < n_stream);
611629
612630            LLAMA_LOG_DEBUG (" %s: copying KV buffer: stream %d to stream %d\n " 
613631
@@ -616,8 +634,8 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
616634            for  (uint32_t  il = 0 ; il < layers.size (); ++il) {
617635                const  auto  & layer = layers[il];
618636
619-                 ggml_backend_tensor_copy (layer.k_stream . at ( ssrc) , layer.k_stream . at ( sdst) );
620-                 ggml_backend_tensor_copy (layer.v_stream . at ( ssrc) , layer.v_stream . at ( sdst) );
637+                 ggml_backend_tensor_copy (layer.k_stream [ ssrc] , layer.k_stream [ sdst] );
638+                 ggml_backend_tensor_copy (layer.v_stream [ ssrc] , layer.v_stream [ sdst] );
621639            }
622640        }
623641    }
@@ -927,7 +945,7 @@ void llama_kv_cache_unified::apply_ubatch(const slot_info & sinfo, const llama_u
927945
928946            auto  & cells = v_cells[sinfo.strm [s]];
929947
930-             const  auto  idx = sinfo.idxs . at (s). at (ii) ;
948+             const  auto  idx = sinfo.idxs [s][ii] ;
931949
932950            if  (!cells.is_empty (idx)) {
933951                assert (cells.seq_count (idx) == 1 );
@@ -1189,7 +1207,7 @@ void llama_kv_cache_unified::set_input_k_idxs(ggml_tensor * dst, const llama_uba
11891207        const  int64_t  offs = sinfo.strm [s]*get_size ();
11901208
11911209        for  (uint32_t  i = 0 ; i < sinfo.size (); ++i) {
1192-             data[s*sinfo.size () + i] = offs + sinfo.idxs . at (s). at (i) ;
1210+             data[s*sinfo.size () + i] = offs + sinfo.idxs [s][i] ;
11931211        }
11941212    }
11951213}
@@ -1210,7 +1228,7 @@ void llama_kv_cache_unified::set_input_v_idxs(ggml_tensor * dst, const llama_uba
12101228            const  int64_t  offs = sinfo.strm [s]*get_size ();
12111229
12121230            for  (uint32_t  i = 0 ; i < sinfo.size (); ++i) {
1213-                 data[s*sinfo.size () + i] = offs + sinfo.idxs . at (s). at (i) ;
1231+                 data[s*sinfo.size () + i] = offs + sinfo.idxs [s][i] ;
12141232            }
12151233        }
12161234    } else  {
@@ -1224,7 +1242,7 @@ void llama_kv_cache_unified::set_input_v_idxs(ggml_tensor * dst, const llama_uba
12241242
12251243            for  (uint32_t  i = 0 ; i < sinfo.size (); ++i) {
12261244                for  (uint32_t  j = 0 ; j < n_embd_v_gqa; ++j) {
1227-                     data[s*sinfo.size ()*n_embd_v_gqa + i*n_embd_v_gqa + j] = offs + j*kv_size + sinfo.idxs . at (s). at (i) ;
1245+                     data[s*sinfo.size ()*n_embd_v_gqa + i*n_embd_v_gqa + j] = offs + j*kv_size + sinfo.idxs [s][i] ;
12281246                }
12291247            }
12301248        }
@@ -1847,6 +1865,8 @@ void llama_kv_cache_unified::state_write(llama_io_write_i & io, llama_seq_id seq
18471865}
18481866
18491867void  llama_kv_cache_unified::state_read (llama_io_read_i & io, llama_seq_id seq_id) {
1868+     GGML_ASSERT (seq_id == -1  || (seq_id >= 0  && (size_t ) seq_id < seq_to_stream.size ()));
1869+ 
18501870    uint32_t  n_stream_cur;
18511871    io.read_to (&n_stream_cur, sizeof (n_stream_cur));
18521872    if  (n_stream_cur != n_stream) {
@@ -1861,7 +1881,7 @@ void llama_kv_cache_unified::state_read(llama_io_read_i & io, llama_seq_id seq_i
18611881            continue ;
18621882        }
18631883
1864-         const  uint32_t  strm = seq_id == -1  ? s : seq_to_stream. at ( seq_id) ;
1884+         const  uint32_t  strm = seq_id == -1  ? s : seq_to_stream[ seq_id] ;
18651885
18661886        bool  res = true ;
18671887        res = res && state_read_meta (io, strm, cell_count, seq_id);
0 commit comments