diff --git a/llama.cpp b/llama.cpp index b8bc0d8215631..ecee564528ff8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1654,7 +1654,7 @@ static bool llama_eval_internal( // cur = cur*norm(broadcasted) cur = ggml_mul(ctx0, cur, model.norm); - offload_func_nr(cur); + // offload_func_nr(cur); // TODO CPU + GPU mirrored backend ggml_set_name(cur, "result_norm"); embeddings = cur;