@@ -9001,21 +9001,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
90019001 const size_t elt_size = ggml_element_size (kv_self.k );
90029002
90039003 ggml_context * cpy_ctx = ggml_init ({ 4096 , NULL , /* no_alloc */ true });
9004-
9005- // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
9006- struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc (sizeof (ggml_cgraph)));
9007-
9008- (*gf).n_nodes = 0 ;
9009- (*gf).n_leafs = 0 ;
9010- (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
9011- (*gf).perf_runs = 0 ;
9012- (*gf).perf_cycles = 0 ;
9013- (*gf).perf_time_us = 0 ;
9014-
9015- memset ((*gf).nodes , 0 , sizeof ((*gf).nodes ));
9016- memset ((*gf).grads , 0 , sizeof ((*gf).grads ));
9017- memset ((*gf).leafs , 0 , sizeof ((*gf).leafs ));
9018- memset ((*gf).visited_hash_table , 0 , sizeof ((*gf).visited_hash_table ));
9004+ ggml_cgraph gf{};
90199005
90209006 ggml_tensor * kout3d = ggml_new_tensor_3d (cpy_ctx, kv_self.k ->type , n_embd, kv_head, n_layer);
90219007 std::vector<uint8_t > kout3d_data (ggml_nbytes (kout3d), 0 );
@@ -9033,20 +9019,16 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
90339019 kv_head, n_embd, n_layer,
90349020 elt_size*n_ctx, elt_size*n_ctx*n_embd, 0 );
90359021
9036- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, k3d, kout3d));
9037- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, v3d, vout3d));
9038- ggml_graph_compute_helper (ctx->work_buffer , gf, /* n_threads*/ 1 );
9022+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, k3d, kout3d));
9023+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, v3d, vout3d));
9024+ ggml_graph_compute_helper (ctx->work_buffer , & gf, /* n_threads*/ 1 );
90399025
90409026 ggml_free (cpy_ctx);
90419027
90429028 // our data is now in the kout3d_data and vout3d_data buffers
90439029 // write them to file
90449030 data_ctx->write (kout3d_data.data (), kout3d_data.size ());
90459031 data_ctx->write (vout3d_data.data (), vout3d_data.size ());
9046-
9047- // free our allocated graph
9048- free (gf);
9049- gf = NULL ;
90509032 }
90519033
90529034 for (uint32_t i = 0 ; i < kv_size; ++i) {
@@ -9147,21 +9129,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
91479129 const size_t elt_size = ggml_element_size (kv_self.k );
91489130
91499131 ggml_context * cpy_ctx = ggml_init ({ 4096 , NULL , /* no_alloc */ true });
9150-
9151- // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
9152- struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc (sizeof (ggml_cgraph)));
9153-
9154- (*gf).n_nodes = 0 ;
9155- (*gf).n_leafs = 0 ;
9156- (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
9157- (*gf).perf_runs = 0 ;
9158- (*gf).perf_cycles = 0 ;
9159- (*gf).perf_time_us = 0 ;
9160-
9161- memset ((*gf).nodes , 0 , sizeof ((*gf).nodes ));
9162- memset ((*gf).grads , 0 , sizeof ((*gf).grads ));
9163- memset ((*gf).leafs , 0 , sizeof ((*gf).leafs ));
9164- memset ((*gf).visited_hash_table , 0 , sizeof ((*gf).visited_hash_table ));
9132+ ggml_cgraph gf{};
91659133
91669134 ggml_tensor * kin3d = ggml_new_tensor_3d (cpy_ctx, kv_self.k ->type , n_embd, kv_head, n_layer);
91679135 kin3d->data = (void *) inp;
@@ -9179,9 +9147,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
91799147 kv_head, n_embd, n_layer,
91809148 elt_size*n_ctx, elt_size*n_ctx*n_embd, 0 );
91819149
9182- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, kin3d, k3d));
9183- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, vin3d, v3d));
9184- ggml_graph_compute_helper (ctx->work_buffer , gf, /* n_threads*/ 1 );
9150+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, kin3d, k3d));
9151+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, vin3d, v3d));
9152+ ggml_graph_compute_helper (ctx->work_buffer , & gf, /* n_threads*/ 1 );
91859153
91869154 ggml_free (cpy_ctx);
91879155 }
@@ -9233,11 +9201,10 @@ static bool llama_load_session_file_internal(struct llama_context * ctx, const c
92339201 llama_hparams session_hparams;
92349202 file.read_raw (&session_hparams, sizeof (llama_hparams));
92359203
9236- // TODO: need to do floating point comparison imprecisely for norm_eps
9237- // if (session_hparams != ctx->model.hparams) {
9238- // LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__);
9239- // return false;
9240- // }
9204+ if (session_hparams != ctx->model .hparams ) {
9205+ LLAMA_LOG_INFO (" %s : model hparams didn't match from session file!\n " , __func__);
9206+ return false ;
9207+ }
92419208 }
92429209
92439210 // load the prompt
@@ -9662,4 +9629,4 @@ static void llama_log_callback_default(ggml_log_level level, const char * text,
96629629 (void ) user_data;
96639630 fputs (text, stderr);
96649631 fflush (stderr);
9665- }
9632+ }
0 commit comments