@@ -2270,18 +2270,15 @@ struct llama_model_loader {
2270
2270
}
2271
2271
}
2272
2272
2273
-
2274
-
2275
- void load_all_data (struct ggml_context * ctx, llama_progress_callback progress_callback, void * progress_callback_user_data, ggml_backend_buffer_t buf_mmap, llama_mlock * lmlock) {
2276
- size_t size_lock = 0 ;
2273
+ void load_all_data (struct ggml_context * ctx, llama_progress_callback progress_callback, void * progress_callback_user_data, ggml_backend_buffer_t buf_mmap, llama_mlock * lmlock) const {
2277
2274
size_t size_data = 0 ;
2278
2275
2279
2276
for (int i = 0 ; i < gguf_get_n_tensors (ctx_gguf); i++) {
2280
2277
struct ggml_tensor * cur = ggml_get_tensor (ctx, gguf_get_tensor_name (ctx_gguf, i));
2281
2278
size_data += ggml_nbytes (cur);
2282
2279
}
2283
2280
2284
- if (use_mmap) {
2281
+ if (use_mmap && buf_mmap ) {
2285
2282
if (lmlock) {
2286
2283
lmlock->init (mapping->addr );
2287
2284
}
@@ -2305,6 +2302,9 @@ struct llama_model_loader {
2305
2302
if (use_mmap) {
2306
2303
if (buf_mmap) {
2307
2304
ggml_backend_tensor_alloc (buf_mmap, cur, (uint8_t *) mapping->addr + offs);
2305
+ if (lmlock) {
2306
+ lmlock->grow_to (offs + ggml_nbytes (cur));
2307
+ }
2308
2308
} else {
2309
2309
ggml_backend_tensor_set (cur, (uint8_t *) mapping->addr + offs, 0 , ggml_nbytes (cur));
2310
2310
}
@@ -2319,11 +2319,6 @@ struct llama_model_loader {
2319
2319
ggml_backend_tensor_set (cur, read_buf.data (), 0 , ggml_nbytes (cur));
2320
2320
}
2321
2321
}
2322
-
2323
- if (use_mmap && lmlock) {
2324
- size_lock += ggml_nbytes (cur);
2325
- lmlock->grow_to (size_lock);
2326
- }
2327
2322
} else {
2328
2323
// HACK: mark tensor as allocated
2329
2324
cur->data = (void *)(uintptr_t )1 ;
0 commit comments