@@ -9001,21 +9001,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
9001
9001
const size_t elt_size = ggml_element_size (kv_self.k );
9002
9002
9003
9003
ggml_context * cpy_ctx = ggml_init ({ 4096 , NULL , /* no_alloc */ true });
9004
-
9005
- // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
9006
- struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc (sizeof (ggml_cgraph)));
9007
-
9008
- (*gf).n_nodes = 0 ;
9009
- (*gf).n_leafs = 0 ;
9010
- (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
9011
- (*gf).perf_runs = 0 ;
9012
- (*gf).perf_cycles = 0 ;
9013
- (*gf).perf_time_us = 0 ;
9014
-
9015
- memset ((*gf).nodes , 0 , sizeof ((*gf).nodes ));
9016
- memset ((*gf).grads , 0 , sizeof ((*gf).grads ));
9017
- memset ((*gf).leafs , 0 , sizeof ((*gf).leafs ));
9018
- memset ((*gf).visited_hash_table , 0 , sizeof ((*gf).visited_hash_table ));
9004
+ ggml_cgraph gf{};
9019
9005
9020
9006
ggml_tensor * kout3d = ggml_new_tensor_3d (cpy_ctx, kv_self.k ->type , n_embd, kv_head, n_layer);
9021
9007
std::vector<uint8_t > kout3d_data (ggml_nbytes (kout3d), 0 );
@@ -9033,20 +9019,16 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
9033
9019
kv_head, n_embd, n_layer,
9034
9020
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0 );
9035
9021
9036
- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, k3d, kout3d));
9037
- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, v3d, vout3d));
9038
- ggml_graph_compute_helper (ctx->work_buffer , gf, /* n_threads*/ 1 );
9022
+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, k3d, kout3d));
9023
+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, v3d, vout3d));
9024
+ ggml_graph_compute_helper (ctx->work_buffer , & gf, /* n_threads*/ 1 );
9039
9025
9040
9026
ggml_free (cpy_ctx);
9041
9027
9042
9028
// our data is now in the kout3d_data and vout3d_data buffers
9043
9029
// write them to file
9044
9030
data_ctx->write (kout3d_data.data (), kout3d_data.size ());
9045
9031
data_ctx->write (vout3d_data.data (), vout3d_data.size ());
9046
-
9047
- // free our allocated graph
9048
- free (gf);
9049
- gf = NULL ;
9050
9032
}
9051
9033
9052
9034
for (uint32_t i = 0 ; i < kv_size; ++i) {
@@ -9147,21 +9129,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
9147
9129
const size_t elt_size = ggml_element_size (kv_self.k );
9148
9130
9149
9131
ggml_context * cpy_ctx = ggml_init ({ 4096 , NULL , /* no_alloc */ true });
9150
-
9151
- // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
9152
- struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc (sizeof (ggml_cgraph)));
9153
-
9154
- (*gf).n_nodes = 0 ;
9155
- (*gf).n_leafs = 0 ;
9156
- (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
9157
- (*gf).perf_runs = 0 ;
9158
- (*gf).perf_cycles = 0 ;
9159
- (*gf).perf_time_us = 0 ;
9160
-
9161
- memset ((*gf).nodes , 0 , sizeof ((*gf).nodes ));
9162
- memset ((*gf).grads , 0 , sizeof ((*gf).grads ));
9163
- memset ((*gf).leafs , 0 , sizeof ((*gf).leafs ));
9164
- memset ((*gf).visited_hash_table , 0 , sizeof ((*gf).visited_hash_table ));
9132
+ ggml_cgraph gf{};
9165
9133
9166
9134
ggml_tensor * kin3d = ggml_new_tensor_3d (cpy_ctx, kv_self.k ->type , n_embd, kv_head, n_layer);
9167
9135
kin3d->data = (void *) inp;
@@ -9179,9 +9147,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
9179
9147
kv_head, n_embd, n_layer,
9180
9148
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0 );
9181
9149
9182
- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, kin3d, k3d));
9183
- ggml_build_forward_expand (gf, ggml_cpy (cpy_ctx, vin3d, v3d));
9184
- ggml_graph_compute_helper (ctx->work_buffer , gf, /* n_threads*/ 1 );
9150
+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, kin3d, k3d));
9151
+ ggml_build_forward_expand (& gf, ggml_cpy (cpy_ctx, vin3d, v3d));
9152
+ ggml_graph_compute_helper (ctx->work_buffer , & gf, /* n_threads*/ 1 );
9185
9153
9186
9154
ggml_free (cpy_ctx);
9187
9155
}
@@ -9233,11 +9201,10 @@ static bool llama_load_session_file_internal(struct llama_context * ctx, const c
9233
9201
llama_hparams session_hparams;
9234
9202
file.read_raw (&session_hparams, sizeof (llama_hparams));
9235
9203
9236
- // TODO: need to do floating point comparison imprecisely for norm_eps
9237
- // if (session_hparams != ctx->model.hparams) {
9238
- // LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__);
9239
- // return false;
9240
- // }
9204
+ if (session_hparams != ctx->model .hparams ) {
9205
+ LLAMA_LOG_INFO (" %s : model hparams didn't match from session file!\n " , __func__);
9206
+ return false ;
9207
+ }
9241
9208
}
9242
9209
9243
9210
// load the prompt
@@ -9662,4 +9629,4 @@ static void llama_log_callback_default(ggml_log_level level, const char * text,
9662
9629
(void ) user_data;
9663
9630
fputs (text, stderr);
9664
9631
fflush (stderr);
9665
- }
9632
+ }
0 commit comments