Skip to content

Commit 4e6db1f

Browse files
committed
reverted memory fixes, see ggml-org#3527
1 parent fe0606c commit 4e6db1f

File tree

3 files changed

+43
-76
lines changed

3 files changed

+43
-76
lines changed

ggml-alloc.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,9 @@ struct ggml_allocr * ggml_allocr_new_from_buffer(struct ggml_backend_buffer * bu
317317
/*.parse_seq = */ {0},
318318
/*.parse_seq_len = */ 0,
319319
#ifdef GGML_ALLOCATOR_DEBUG
320-
(*alloc).allocated_tensors = {0};
320+
/*.allocated_tensors = */ {0},
321321
#endif
322+
};
322323

323324
ggml_allocr_reset(alloc);
324325

@@ -590,4 +591,4 @@ size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph *
590591

591592
size_t ggml_allocr_max_size(struct ggml_allocr * alloc) {
592593
return alloc->max_size;
593-
}
594+
}

ggml.c

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4698,21 +4698,19 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
46984698
}
46994699

47004700
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
4701-
4702-
ctx = (struct ggml_context *)malloc(sizeof(struct ggml_context));
4703-
4704-
struct ggml_scratch empty_scratch = { 0, 0, NULL };
4705-
4706-
(*ctx).mem_size = mem_size;
4707-
(*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size);
4708-
(*ctx).mem_buffer_owned = params.mem_buffer ? false : true;
4709-
(*ctx).no_alloc = params.no_alloc;
4710-
(*ctx).no_alloc_save = params.no_alloc;
4711-
(*ctx).n_objects = 0;
4712-
(*ctx).objects_begin = NULL;
4713-
(*ctx).objects_end = NULL;
4714-
(*ctx).scratch = empty_scratch;
4715-
(*ctx).scratch_save = empty_scratch;
4701+
4702+
*ctx = (struct ggml_context) {
4703+
/*.mem_size =*/ mem_size,
4704+
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size),
4705+
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
4706+
/*.no_alloc =*/ params.no_alloc,
4707+
/*.no_alloc_save =*/ params.no_alloc,
4708+
/*.n_objects =*/ 0,
4709+
/*.objects_begin =*/ NULL,
4710+
/*.objects_end =*/ NULL,
4711+
/*.scratch =*/ { 0, 0, NULL, },
4712+
/*.scratch_save =*/ { 0, 0, NULL, },
4713+
};
47164714

47174715
GGML_ASSERT(ctx->mem_buffer != NULL);
47184716

@@ -18054,18 +18052,19 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
1805418052
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
1805518053
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
1805618054
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
18057-
18058-
(*cgraph).n_nodes = 0;
18059-
(*cgraph).n_leafs = 0;
18060-
(*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
18061-
(*cgraph).perf_runs = 0;
18062-
(*cgraph).perf_cycles = 0;
18063-
(*cgraph).perf_time_us = 0;
18064-
18065-
memset((*cgraph).nodes, 0, sizeof((*cgraph).nodes));
18066-
memset((*cgraph).grads, 0, sizeof((*cgraph).grads));
18067-
memset((*cgraph).leafs, 0, sizeof((*cgraph).leafs));
18068-
memset((*cgraph).visited_hash_table, 0, sizeof((*cgraph).visited_hash_table));
18055+
18056+
*cgraph = (struct ggml_cgraph) {
18057+
/*.n_nodes =*/ 0,
18058+
/*.n_leafs =*/ 0,
18059+
/*.nodes =*/ { NULL },
18060+
/*.grads =*/ { NULL },
18061+
/*.leafs =*/ { NULL },
18062+
/*.hash_table =*/ { NULL },
18063+
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
18064+
/*.perf_runs =*/ 0,
18065+
/*.perf_cycles =*/ 0,
18066+
/*.perf_time_us =*/ 0,
18067+
};
1806918068

1807018069
return cgraph;
1807118070
}
@@ -22005,4 +22004,4 @@ int ggml_cpu_has_vsx(void) {
2200522004
#endif
2200622005
}
2200722006

22008-
////////////////////////////////////////////////////////////////////////////////
22007+
////////////////////////////////////////////////////////////////////////////////

llama.cpp

Lines changed: 13 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9001,21 +9001,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
90019001
const size_t elt_size = ggml_element_size(kv_self.k);
90029002

90039003
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
9004-
9005-
// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
9006-
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));
9007-
9008-
(*gf).n_nodes = 0;
9009-
(*gf).n_leafs = 0;
9010-
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
9011-
(*gf).perf_runs = 0;
9012-
(*gf).perf_cycles = 0;
9013-
(*gf).perf_time_us = 0;
9014-
9015-
memset((*gf).nodes, 0, sizeof((*gf).nodes));
9016-
memset((*gf).grads, 0, sizeof((*gf).grads));
9017-
memset((*gf).leafs, 0, sizeof((*gf).leafs));
9018-
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));
9004+
ggml_cgraph gf{};
90199005

90209006
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
90219007
std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
@@ -9033,20 +9019,16 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
90339019
kv_head, n_embd, n_layer,
90349020
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);
90359021

9036-
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d));
9037-
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d));
9038-
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);
9022+
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d));
9023+
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
9024+
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
90399025

90409026
ggml_free(cpy_ctx);
90419027

90429028
// our data is now in the kout3d_data and vout3d_data buffers
90439029
// write them to file
90449030
data_ctx->write(kout3d_data.data(), kout3d_data.size());
90459031
data_ctx->write(vout3d_data.data(), vout3d_data.size());
9046-
9047-
// free our allocated graph
9048-
free(gf);
9049-
gf = NULL;
90509032
}
90519033

90529034
for (uint32_t i = 0; i < kv_size; ++i) {
@@ -9147,21 +9129,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
91479129
const size_t elt_size = ggml_element_size(kv_self.k);
91489130

91499131
ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
9150-
9151-
// create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
9152-
struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));
9153-
9154-
(*gf).n_nodes = 0;
9155-
(*gf).n_leafs = 0;
9156-
(*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
9157-
(*gf).perf_runs = 0;
9158-
(*gf).perf_cycles = 0;
9159-
(*gf).perf_time_us = 0;
9160-
9161-
memset((*gf).nodes, 0, sizeof((*gf).nodes));
9162-
memset((*gf).grads, 0, sizeof((*gf).grads));
9163-
memset((*gf).leafs, 0, sizeof((*gf).leafs));
9164-
memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));
9132+
ggml_cgraph gf{};
91659133

91669134
ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
91679135
kin3d->data = (void *) inp;
@@ -9179,9 +9147,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
91799147
kv_head, n_embd, n_layer,
91809148
elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);
91819149

9182-
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d));
9183-
ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d));
9184-
ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);
9150+
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d));
9151+
ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
9152+
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
91859153

91869154
ggml_free(cpy_ctx);
91879155
}
@@ -9233,11 +9201,10 @@ static bool llama_load_session_file_internal(struct llama_context * ctx, const c
92339201
llama_hparams session_hparams;
92349202
file.read_raw(&session_hparams, sizeof(llama_hparams));
92359203

9236-
// TODO: need to do floating point comparison imprecisely for norm_eps
9237-
//if (session_hparams != ctx->model.hparams) {
9238-
// LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__);
9239-
// return false;
9240-
//}
9204+
if (session_hparams != ctx->model.hparams) {
9205+
LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__);
9206+
return false;
9207+
}
92419208
}
92429209

92439210
// load the prompt
@@ -9662,4 +9629,4 @@ static void llama_log_callback_default(ggml_log_level level, const char * text,
96629629
(void) user_data;
96639630
fputs(text, stderr);
96649631
fflush(stderr);
9665-
}
9632+
}

0 commit comments

Comments
 (0)