From bf60f27cda6973a45a117aa8ebd3b3e19a3dab03 Mon Sep 17 00:00:00 2001 From: slaren Date: Sat, 2 Nov 2024 00:54:16 +0100 Subject: [PATCH 1/2] ggml : do not abort when ggml_aligned_malloc fails --- ggml/src/ggml.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 84f2c766bd813..47b82a34b217f 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -433,7 +433,6 @@ void * ggml_aligned_malloc(size_t size) { break; } GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); - GGML_ABORT("fatal error"); return NULL; } return aligned_memory; From 20e12112fda76906d006f5af42b8099163c515cc Mon Sep 17 00:00:00 2001 From: slaren Date: Sat, 2 Nov 2024 00:55:19 +0100 Subject: [PATCH 2/2] llama : suggest reduce ctx size when kv init fails --- ggml/src/ggml-backend.cpp | 2 +- src/llama.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index c2afdf3915822..7fef5025e121a 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -798,7 +798,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back void * data = ggml_aligned_malloc(size); if (data == NULL) { - GGML_LOG_ERROR("%s: failed to allocate buffer of size %zu\n", __func__, size); + GGML_LOG_ERROR("%s: failed to allocate buffer of size %.2f MiB\n", __func__, size / 1024.0 / 1024.0); return NULL; } diff --git a/src/llama.cpp b/src/llama.cpp index 0991c40893ceb..1f60370057b45 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -19520,6 +19520,7 @@ struct llama_context * llama_new_context_with_model( if (!llama_kv_cache_init(ctx->kv_self, ctx, type_k, type_v, kv_size, cparams.offload_kqv)) { LLAMA_LOG_ERROR("%s: llama_kv_cache_init() failed for self-attention cache\n", __func__); + LLAMA_LOG_ERROR("%s: suggestion: try using a smaller context size (-c command line option or llama_context_params.n_ctx)\n", __func__); llama_free(ctx); return nullptr; }