diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 9ae4bc198..35824cd2d 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -529,13 +529,14 @@ static void init_lora(const struct my_llama_model * model, struct my_llama_lora set_param_lora(lora); // measure data size - struct ggml_allocr * alloc = NULL; - alloc = ggml_allocr_new_measure(tensor_alignment); - alloc_lora(alloc, lora); + size_t size = 0; + for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + size += GGML_PAD(ggml_nbytes(t), tensor_alignment); + } // allocate data - lora->data.resize(ggml_allocr_max_size(alloc) + tensor_alignment); - ggml_allocr_free(alloc); + struct ggml_allocr * alloc = NULL; + lora->data.resize(size + tensor_alignment); alloc = ggml_allocr_new(lora->data.data(), lora->data.size(), tensor_alignment); alloc_lora(alloc, lora); ggml_allocr_free(alloc); @@ -1714,11 +1715,9 @@ int main(int argc, char ** argv) { struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx_input, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); // measure required memory for input tensors - alloc = ggml_allocr_new_measure(tensor_alignment); - ggml_allocr_alloc(alloc, tokens_input); - ggml_allocr_alloc(alloc, target_probs); - size_t max_input_size = ggml_allocr_max_size(alloc) + tensor_alignment; - ggml_allocr_free(alloc); + size_t max_input_size = GGML_PAD(ggml_nbytes(tokens_input), tensor_alignment) + + GGML_PAD(ggml_nbytes(target_probs), tensor_alignment) + + tensor_alignment; printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f)); // allocate input tensors diff --git a/ggml.c b/ggml.c index c00ab00da..630deb49d 100644 --- a/ggml.c +++ b/ggml.c @@ -5494,6 +5494,39 @@ struct ggml_tensor * ggml_view_tensor( return result; } +struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx) { + struct ggml_object * obj = ctx->objects_begin; + + char * const mem_buffer = ctx->mem_buffer; + + while (obj != NULL) { + if (obj->type == GGML_OBJECT_TENSOR) { + return (struct ggml_tensor *)(mem_buffer + obj->offs); + } + + obj = obj->next; + } + + return NULL; +} + +struct ggml_tensor * ggml_get_next_tensor(struct ggml_context * ctx, struct ggml_tensor * tensor) { + struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE); + obj = obj->next; + + char * const mem_buffer = ctx->mem_buffer; + + while (obj != NULL) { + if (obj->type == GGML_OBJECT_TENSOR) { + return (struct ggml_tensor *)(mem_buffer + obj->offs); + } + + obj = obj->next; + } + + return NULL; +} + struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { struct ggml_object * obj = ctx->objects_begin; @@ -8647,6 +8680,7 @@ void ggml_set_param( GGML_ASSERT(tensor->grad == NULL); tensor->grad = ggml_dup_tensor(ctx, tensor); + ggml_format_name(tensor->grad, "%s (grad)", tensor->name); } // ggml_compute_forward_dup diff --git a/ggml.h b/ggml.h index 3eddc44b9..6e35888e9 100644 --- a/ggml.h +++ b/ggml.h @@ -704,6 +704,9 @@ extern "C" { GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src); + // Context tensor enumeration and lookup + GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx); + GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor); GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);