Skip to content

Commit 41f308f

Browse files
authored
llama : do not print "offloading layers" message in CPU-only builds (#5416)
1 parent 6e99f2a commit 41f308f

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

llama.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4209,8 +4209,7 @@ static bool llm_load_tensors(
42094209
ctx_bufs.emplace_back(ctx, buf);
42104210
}
42114211

4212-
// print memory requirements
4213-
{
4212+
if (llama_supports_gpu_offload()) {
42144213
const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer));
42154214

42164215
LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu);
@@ -4222,10 +4221,11 @@ static bool llm_load_tensors(
42224221
const int max_offloadable_layers = hparams.n_layer + 1;
42234222

42244223
LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers);
4224+
}
42254225

4226-
for (ggml_backend_buffer_t buf : model.bufs) {
4227-
LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
4228-
}
4226+
// print memory requirements
4227+
for (ggml_backend_buffer_t buf : model.bufs) {
4228+
LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0);
42294229
}
42304230

42314231
// populate tensors_by_name

0 commit comments

Comments
 (0)