From ac3b8869538c7fbdb48ff141d78c4dea091789f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Fri, 16 Jun 2023 20:25:51 +0200 Subject: [PATCH] llama : fix embd when offloading non-repeating layers (#1891) --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index a90438844..81f047ed2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1658,7 +1658,7 @@ static bool llama_eval_internal( // cur = cur*norm(broadcasted) cur = ggml_mul(ctx0, cur, model.norm); - offload_func_nr(cur); + // offload_func_nr(cur); // TODO CPU + GPU mirrored backend ggml_set_name(cur, "result_norm"); embeddings = cur;