llama : replace (permute + reshape + view_1d) with (view_3d) (#2538)

ggml-ci
This commit is contained in:
Georgi Gerganov 2023-08-17 10:47:09 +03:00 committed by GitHub
parent 7cf54e1f74
commit a73ccf1aa3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1609,11 +1609,11 @@ static struct ggml_cgraph * llama_build_graph(
ggml_set_name(Q, "Q");
struct ggml_tensor * K =
ggml_permute(ctx0,
ggml_reshape_3d(ctx0,
ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa),
n_embd_head, n_head_kv, n_past + N),
0, 2, 1, 3);
ggml_view_3d(ctx0, kv_self.k,
n_embd_head, n_past + N, n_head_kv,
ggml_element_size(kv_self.k)*n_embd_gqa,
ggml_element_size(kv_self.k)*n_embd_head,
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il);
offload_func_kq(K);
ggml_set_name(K, "K");
@ -1642,9 +1642,9 @@ static struct ggml_cgraph * llama_build_graph(
struct ggml_tensor * V =
ggml_view_3d(ctx0, kv_self.v,
n_past + N, n_embd_head, n_head_kv,
n_ctx*ggml_element_size(kv_self.v),
n_ctx*ggml_element_size(kv_self.v)*n_embd_head,
n_ctx*ggml_element_size(kv_self.v)*n_embd_gqa*il);
ggml_element_size(kv_self.v)*n_ctx,
ggml_element_size(kv_self.v)*n_ctx*n_embd_head,
ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il);
offload_func_v(V);
ggml_set_name(V, "V");