@@ -2469,7 +2469,9 @@ struct llama_model_loader {
24692469 }
24702470
24712471 if (progress_callback) {
2472- progress_callback (1 .0f , progress_callback_user_data);
2472+ // Even though the model is done loading, we still honor
2473+ // cancellation since we need to free allocations.
2474+ return progress_callback (1 .0f , progress_callback_user_data);
24732475 }
24742476 return true ;
24752477 }
@@ -3060,8 +3062,6 @@ static bool llm_load_tensors(
30603062 void * progress_callback_user_data) {
30613063 model.t_start_us = ggml_time_us ();
30623064
3063- bool ok = true ; // if false, model load was cancelled
3064-
30653065 auto & ctx = model.ctx ;
30663066 auto & hparams = model.hparams ;
30673067
@@ -3729,19 +3729,16 @@ static bool llm_load_tensors(
37293729 model.tensors_by_name .emplace_back (ggml_get_name (cur), cur);
37303730 }
37313731
3732- ok = ok && ml.load_all_data (ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL );
3733- if (progress_callback) {
3734- // Even though the model is done loading, we still honor
3735- // cancellation since we need to free allocations.
3736- ok = ok && progress_callback (1 .0f , progress_callback_user_data);
3732+ if (!ml.load_all_data (ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL )) {
3733+ return false ;
37373734 }
37383735
37393736 model.mapping = std::move (ml.mapping );
37403737
37413738 // loading time will be recalculate after the first eval, so
37423739 // we take page faults deferred by mmap() into consideration
37433740 model.t_load_us = ggml_time_us () - model.t_start_us ;
3744- return ok ;
3741+ return true ;
37453742}
37463743
37473744// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
0 commit comments