From bc34dd4f5b5a7c10ae3ed85a265ce6f2ed2fab79 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 29 Sep 2023 19:05:18 +0300 Subject: [PATCH] train : fix KQ_pos allocation (#3392) * train : fix KQ_pos allocation * make sure KQ_pos is not reallocated in finetune --------- Co-authored-by: xaedes --- examples/finetune/finetune.cpp | 5 ++++- examples/train-text-from-scratch/train-text-from-scratch.cpp | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index b61165fb7..8ca1874da 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -626,7 +626,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( // KQ_pos - contains the positions struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); - { + ggml_allocr_alloc(alloc, KQ_pos); + if (!ggml_allocr_is_measure(alloc)) { int * data = (int *) KQ_pos->data; for (int i = 0; i < N; ++i) { data[i] = n_past + i; @@ -786,6 +787,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one)); GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL); ggml_allocr_alloc(alloc, t36->grad); + // KQ_pos + ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, one)); // make sure base model tensors data cannot be used in viewable operations ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->tok_embeddings, one)); diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 5043f32d0..be693b3ac 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -334,7 +334,8 @@ static struct ggml_tensor * llama_build_train_graphs( // KQ_pos - contains the positions struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N); - { + ggml_allocr_alloc(alloc, KQ_pos); + if (!ggml_allocr_is_measure(alloc)) { int * data = (int *) KQ_pos->data; for (int i = 0; i < N; ++i) { data[i] = n_past + i;