Skip to content

Commit dd8f089

Browse files
cebtenzzrehodlen
authored andcommitted
kompute : fix fallback to CPU (ggml-org#5201)
1 parent a03c999 commit dd8f089

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4136,7 +4136,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
41364136
}
41374137

41384138
#ifdef GGML_USE_KOMPUTE
4139-
if (ggml_vk_has_device() && params.n_gpu_layers > 0 && (
4139+
if (params.n_gpu_layers > 0 && (
41404140
!(model.arch == LLM_ARCH_LLAMA || model.arch == LLM_ARCH_FALCON)
41414141
|| !(
41424142
model.ftype == LLAMA_FTYPE_ALL_F32 ||
@@ -4145,8 +4145,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
41454145
model.ftype == LLAMA_FTYPE_MOSTLY_Q4_1
41464146
)
41474147
)) {
4148-
// disable Vulkan due to unsupported model architecture or quantization type
41494148
// TODO(cebtenzzre): propagate this error outside of llama_load_model_from_file
4149+
LLAMA_LOG_WARN("%s: disabling Kompute due to unsupported model arch or quantization\n", __func__);
41504150
params.n_gpu_layers = 0;
41514151
}
41524152
#endif

0 commit comments

Comments
 (0)