Skip to content

Commit 67509db

Browse files
committed
Avoid mlock of offloaded tensors.
1 parent baf2c2d commit 67509db

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

llama.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ struct llama_model_loader {
691691
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
692692
size_t data_size = 0;
693693
size_t prefetch_size = 0;
694+
size_t lock_size = 0;
694695
for (const llama_load_tensor & lt : tensors_map.tensors) {
695696
data_size += lt.size;
696697
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
@@ -716,6 +717,11 @@ struct llama_model_loader {
716717
switch(lt.ggml_tensor->backend) {
717718
case GGML_BACKEND_CPU:
718719
lt.ggml_tensor->data = lt.data;
720+
if (use_mmap && lmlock)
721+
{
722+
lock_size += lt.size;
723+
lmlock->grow_to(lock_size);
724+
}
719725
break;
720726
#ifdef GGML_USE_CUBLAS
721727
case GGML_BACKEND_CUDA:
@@ -731,9 +737,6 @@ struct llama_model_loader {
731737
continue;
732738
}
733739
done_size += lt.size;
734-
if (use_mmap && lmlock) {
735-
lmlock->grow_to(done_size);
736-
}
737740
}
738741
}
739742

0 commit comments

Comments
 (0)