From cafcd4f89500b8afef722cdb08088eceb8a22572 Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 14 Dec 2023 16:52:08 +0100 Subject: [PATCH] ggml : remove n_dims from ggml_tensor (#4469) ggml-ci --- common/train.cpp | 18 ++-- examples/baby-llama/baby-llama.cpp | 18 ++-- .../convert-llama2c-to-ggml.cpp | 4 +- examples/finetune/finetune.cpp | 2 +- examples/gguf/gguf.cpp | 2 +- examples/llava/clip.cpp | 6 +- ggml.c | 94 ++++++++++--------- ggml.h | 8 +- llama.cpp | 2 +- 9 files changed, 81 insertions(+), 73 deletions(-) diff --git a/common/train.cpp b/common/train.cpp index 773e2c59c..dcf9614e4 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -71,7 +71,7 @@ void free_random_uniform_distribution(struct random_uniform_distribution * rnd) struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct random_normal_distribution * rnd) { float scale = 1.0f; // xavier - switch (tensor->n_dims) { + switch (ggml_n_dims(tensor)) { case 1: scale /= sqrtf((float) tensor->ne[0]); for (int i0 = 0; i0 < tensor->ne[0]; i0++) { @@ -119,7 +119,7 @@ struct ggml_tensor * randomize_tensor_normal(struct ggml_tensor * tensor, struct } struct ggml_tensor * randomize_tensor_uniform(struct ggml_tensor * tensor, struct random_uniform_distribution * rnd) { - switch (tensor->n_dims) { + switch (ggml_n_dims(tensor)) { case 1: for (int i0 = 0; i0 < tensor->ne[0]; i0++) { float * dst = (float *) ((char *) tensor->data + i0*tensor->nb[0]); @@ -183,25 +183,27 @@ float fclamp(const float v, const float min, const float max) { } void assert_shape_1d(struct ggml_tensor * tensor, int64_t ne0) { - GGML_ASSERT(tensor->n_dims == 1); GGML_ASSERT(tensor->ne[0] == ne0); + GGML_ASSERT(tensor->ne[1] == 1); + GGML_ASSERT(tensor->ne[2] == 1); + GGML_ASSERT(tensor->ne[3] == 1); } void assert_shape_2d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1) { - GGML_ASSERT(tensor->n_dims == 2); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); + GGML_ASSERT(tensor->ne[2] == 1); + GGML_ASSERT(tensor->ne[3] == 1); } void assert_shape_3d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2) { - GGML_ASSERT(tensor->n_dims == 3); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); GGML_ASSERT(tensor->ne[2] == ne2); + GGML_ASSERT(tensor->ne[3] == 1); } void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { - GGML_ASSERT(tensor->n_dims == 4); GGML_ASSERT(tensor->ne[0] == ne0); GGML_ASSERT(tensor->ne[1] == ne1); GGML_ASSERT(tensor->ne[2] == ne2); @@ -225,8 +227,8 @@ int64_t get_example_targets_batch( bool sample_random_offsets ) { GGML_ASSERT(samples_count > 0); - GGML_ASSERT(tokens_input->n_dims == 2); - GGML_ASSERT(target_probs->n_dims == 3); + GGML_ASSERT(ggml_is_matrix(tokens_input)); + GGML_ASSERT(ggml_is_3d(target_probs)); int64_t n_vocab = target_probs->ne[0]; int64_t n_tokens = tokens_input->ne[0]; int64_t n_batch = tokens_input->ne[1]; diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 8155101d0..2dc2988d3 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1258,9 +1258,9 @@ static struct ggml_tensor * forward_lora( } static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) { - assert(logits->n_dims == 2); - assert(probs->n_dims == 2); - assert(best_samples->n_dims == 1); + assert(ggml_is_matrix(logits)); + assert(ggml_is_matrix(probs)); + assert(ggml_is_vector(best_samples)); assert(logits->ne[1] == best_samples->ne[0]); assert(logits->ne[0] == probs->ne[0]); assert(logits->ne[1] == probs->ne[1]); @@ -1292,9 +1292,9 @@ static void sample_softmax_batch( struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples ) { - GGML_ASSERT(best_samples->n_dims == 2); - GGML_ASSERT(logits->n_dims == 3); - GGML_ASSERT(probs->n_dims == 3); + GGML_ASSERT(ggml_is_matrix(best_samples)); + GGML_ASSERT(ggml_is_3d(logits)); + GGML_ASSERT(ggml_is_3d(probs)); int n_tokens = best_samples->ne[0]; int n_batch = best_samples->ne[1]; int n_vocab = logits->ne[0]; @@ -1334,7 +1334,7 @@ static void print_row(struct ggml_tensor * probs, int i) { } static void print_matrix(struct ggml_tensor * probs) { - assert(probs->n_dims == 2); + assert(ggml_is_matrix(probs)); for (int i = 0; i < probs->ne[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); @@ -1386,8 +1386,8 @@ static void get_example_targets(int example_id, struct ggml_tensor * tokens_inpu static void get_example_targets_batch( struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets ) { - GGML_ASSERT(tokens_input->n_dims == 2); - GGML_ASSERT( targets->n_dims == 3); + GGML_ASSERT(ggml_is_matrix(tokens_input)); + GGML_ASSERT(ggml_is_3d(targets)); int n_tokens = tokens_input->ne[0]; int n_batch = tokens_input->ne[1]; GGML_ASSERT(n_tokens == targets->ne[1]); diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index cae3bf3c3..4d41e1779 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -427,7 +427,7 @@ static void print_row(struct ggml_tensor * probs, int i) { } static void print_matrix(struct ggml_tensor * probs) { - assert(probs->n_dims == 2); + assert(ggml_is_matrix(probs)); for (int i = 0; i < probs->ne[1]; ++i) { for (int k = 0; k < probs->ne[0]; ++k) { float p = get_f32_2d(probs, k, i); @@ -639,7 +639,7 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) { int ct; - switch (gg_weights->n_dims){ + switch (ggml_n_dims(gg_weights)) { case 1: ct = 0; for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){ diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index af46e44a6..b9849e8c9 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -1110,7 +1110,7 @@ static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor, name = ggml_get_name(tensor); } uint32_t name_len = strlen(name); - uint32_t nd = tensor->n_dims; + uint32_t nd = ggml_n_dims(tensor); uint32_t ne[4] = { (uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], (uint32_t)tensor->ne[2], diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index 9ab63a293..9e24bf24c 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -195,7 +195,7 @@ static bool gguf_ex_read_1(const std::string & fname) { struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name); - printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data); + printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data); // print first 10 elements const float * data = (const float *) cur->data; diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 4bb7b93b6..112465968 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -514,7 +514,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { ctx_size += padded_size; if (verbosity >= 3) { printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, padded_size=%zu, offset=%zu\n", __func__, i, - cur->n_dims, cur->name, tensor_size, padded_size, offset); + ggml_n_dims(cur), cur->name, tensor_size, padded_size, offset); } } } @@ -962,7 +962,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i } // quantize only 2D tensors - quantize &= (cur->n_dims == 2); + quantize &= (ggml_n_dims(cur) == 2); if (quantize) { new_type = type; @@ -1035,7 +1035,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i fout.put(0); } - printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), cur->n_dims, quantize, + printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), ggml_n_dims(cur), quantize, orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); } diff --git a/ggml.c b/ggml.c index f0a972690..f6f8b8251 100644 --- a/ggml.c +++ b/ggml.c @@ -2054,24 +2054,37 @@ size_t ggml_element_size(const struct ggml_tensor * tensor) { return ggml_type_size(tensor->type); } -static inline bool ggml_is_scalar(const struct ggml_tensor * tensor) { +bool ggml_is_scalar(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; } -static inline bool ggml_is_vector(const struct ggml_tensor * tensor) { +bool ggml_is_vector(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; } -static inline bool ggml_is_matrix(const struct ggml_tensor * tensor) { +bool ggml_is_matrix(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[2] == 1 && tensor->ne[3] == 1; } +bool ggml_is_3d(const struct ggml_tensor * tensor) { + return tensor->ne[3] == 1; +} + +int ggml_n_dims(const struct ggml_tensor * tensor) { + for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) { + if (tensor->ne[i] > 1) { + return i + 1; + } + } + return 1; +} + static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); @@ -2521,7 +2534,6 @@ static struct ggml_tensor * ggml_new_tensor_impl( /*.type =*/ type, /*.backend =*/ GGML_BACKEND_CPU, /*.buffer =*/ NULL, - /*.n_dims =*/ n_dims, /*.ne =*/ { 1, 1, 1, 1 }, /*.nb =*/ { 0, 0, 0, 0 }, /*.op =*/ GGML_OP_NONE, @@ -2628,7 +2640,7 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) { } struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { - return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne); + return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne); } static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { @@ -3077,7 +3089,7 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * struct ggml_tensor * ggml_view_tensor( struct ggml_context * ctx, struct ggml_tensor * src) { - struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0); + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0); ggml_format_name(result, "%s (view)", src->name); for (int i = 0; i < GGML_MAX_DIMS; i++) { @@ -3235,10 +3247,10 @@ static struct ggml_tensor * ggml_add_cast_impl( is_node = true; } - struct ggml_tensor * result = ggml_new_tensor(ctx, type, a->n_dims, a->ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne); result->op = GGML_OP_ADD; - result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne) : NULL; + result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne) : NULL; result->src[0] = a; result->src[1] = b; @@ -3607,12 +3619,12 @@ struct ggml_tensor * ggml_sum_rows( is_node = true; } - int64_t ne[4] = {1,1,1,1}; - for (int i=1; in_dims; ++i) { + int64_t ne[GGML_MAX_DIMS] = { 1 }; + for (int i = 1; i < GGML_MAX_DIMS; ++i) { ne[i] = a->ne[i]; } - struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, a->n_dims, ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne); result->op = GGML_OP_SUM_ROWS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -3633,8 +3645,8 @@ struct ggml_tensor * ggml_mean( is_node = true; } - int64_t ne[GGML_MAX_DIMS] = { 1, a->ne[1], a->ne[2], a->ne[3] }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, ne); + int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] }; + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); result->op = GGML_OP_MEAN; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -3656,8 +3668,7 @@ struct ggml_tensor * ggml_argmax( is_node = true; } - int64_t ne[GGML_MAX_DIMS] = { a->ne[1], 1, 1, 1 }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, ne); + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]); result->op = GGML_OP_ARGMAX; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -3680,7 +3691,7 @@ struct ggml_tensor * ggml_repeat( is_node = true; } - struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); result->op = GGML_OP_REPEAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -3707,7 +3718,7 @@ struct ggml_tensor * ggml_repeat_back( return a; } - struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); result->op = GGML_OP_REPEAT_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -4083,7 +4094,7 @@ struct ggml_tensor * ggml_mul_mat( } const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); result->op = GGML_OP_MUL_MAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -4117,7 +4128,7 @@ struct ggml_tensor * ggml_mul_mat_id( } const int64_t ne[4] = { as[0]->ne[1], b->ne[1], b->ne[2], b->ne[3] }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(as[0]->n_dims, b->n_dims), ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); ggml_set_op_params_i32(result, 0, id); ggml_set_op_params_i32(result, 1, n_as); @@ -4155,7 +4166,7 @@ struct ggml_tensor * ggml_out_prod( // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3] const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); result->op = GGML_OP_OUT_PROD; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -4440,7 +4451,7 @@ struct ggml_tensor * ggml_reshape( //GGML_ASSERT(false); } - struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0); + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0); ggml_format_name(result, "%s (reshaped)", a->name); result->op = GGML_OP_RESHAPE; @@ -4818,7 +4829,7 @@ struct ggml_tensor * ggml_diag( } const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] }; - struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, MAX(a->n_dims, 2), ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne); result->op = GGML_OP_DIAG; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -5465,7 +5476,7 @@ struct ggml_tensor * ggml_pool_1d( is_node = true; } - const int64_t ne[3] = { + const int64_t ne[2] = { ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), a->ne[1], }; @@ -5584,7 +5595,7 @@ struct ggml_tensor * ggml_argsort( enum ggml_sort_order order) { bool is_node = false; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, a->ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne); ggml_set_op_params_i32(result, 0, (int32_t) order); @@ -5631,7 +5642,7 @@ struct ggml_tensor * ggml_flash_attn( } //struct ggml_tensor * result = ggml_dup_tensor(ctx, q); - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, q->ne); int32_t t = masked ? 1 : 0; ggml_set_op_params(result, &t, sizeof(t)); @@ -5664,7 +5675,7 @@ struct ggml_tensor * ggml_flash_ff( } //struct ggml_tensor * result = ggml_dup_tensor(ctx, a); - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, a->ne); result->op = GGML_OP_FLASH_FF; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -5780,7 +5791,6 @@ struct ggml_tensor * ggml_win_part( const int np = npx*npy; const int64_t ne[4] = { a->ne[0], w, w, np, }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); int32_t params[] = { npx, npy, w }; @@ -14563,7 +14573,7 @@ static struct ggml_tensor * ggml_recompute_graph_node( return replacements->vals[i]; } - struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, node->n_dims, node->ne); + struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, GGML_MAX_DIMS, node->ne); // insert clone into replacements GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite @@ -16564,7 +16574,7 @@ static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fou fprintf(fout, "%-6s %-12s %8d %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %16zu %16zu %16zu %16zu %16p %32s\n", ggml_type_name(tensor->type), ggml_op_name (tensor->op), - tensor->n_dims, + ggml_n_dims(tensor), ne[0], ne[1], ne[2], ne[3], nb[0], nb[1], nb[2], nb[3], tensor->data, @@ -16579,7 +16589,7 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char arg, ggml_type_name(tensor->type), ggml_op_name (tensor->op), - tensor->n_dims, + ggml_n_dims(tensor), ne[0], ne[1], ne[2], ne[3], nb[0], nb[1], nb[2], nb[3], tensor->data, @@ -16669,11 +16679,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { const uint32_t type = tensor->type; const uint32_t op = tensor->op; - const uint32_t n_dims = tensor->n_dims; fwrite(&type, sizeof(uint32_t), 1, fout); fwrite(&op, sizeof(uint32_t), 1, fout); - fwrite(&n_dims, sizeof(uint32_t), 1, fout); for (int j = 0; j < GGML_MAX_DIMS; ++j) { const uint64_t ne = tensor->ne[j]; @@ -16703,11 +16711,9 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { const uint32_t type = tensor->type; const uint32_t op = tensor->op; - const uint32_t n_dims = tensor->n_dims; fwrite(&type, sizeof(uint32_t), 1, fout); fwrite(&op, sizeof(uint32_t), 1, fout); - fwrite(&n_dims, sizeof(uint32_t), 1, fout); for (int j = 0; j < GGML_MAX_DIMS; ++j) { const uint64_t ne = tensor->ne[j]; @@ -16879,12 +16885,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * { uint32_t type; uint32_t op; - uint32_t n_dims; for (uint32_t i = 0; i < n_leafs; ++i) { type = *(const uint32_t *) ptr; ptr += sizeof(type); op = *(const uint32_t *) ptr; ptr += sizeof(op); - n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims); int64_t ne[GGML_MAX_DIMS]; size_t nb[GGML_MAX_DIMS]; @@ -16900,7 +16904,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * nb[j] = nb_cur; } - struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne); + struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne); tensor->op = (enum ggml_op) op; @@ -16917,7 +16921,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * ptr += ggml_nbytes(tensor); - fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor)); + fprintf(stderr, "%s: loaded leaf %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor)); } } @@ -16927,12 +16931,10 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * { uint32_t type; uint32_t op; - uint32_t n_dims; for (uint32_t i = 0; i < n_nodes; ++i) { type = *(const uint32_t *) ptr; ptr += sizeof(type); op = *(const uint32_t *) ptr; ptr += sizeof(op); - n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims); enum ggml_op eop = (enum ggml_op) op; @@ -17003,7 +17005,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * } break; default: { - tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne); + tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, GGML_MAX_DIMS, ne); tensor->op = eop; } break; @@ -17022,7 +17024,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context * result->nodes[i] = tensor; - fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor)); + fprintf(stderr, "%s: loaded node %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor)); } } } @@ -17160,7 +17162,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph fprintf(fp, "(%s)|", ggml_type_name(node->type)); } - if (node->n_dims == 2) { + if (ggml_is_matrix(node)) { fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op)); } else { fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op)); @@ -17427,7 +17429,7 @@ static enum ggml_opt_result ggml_opt_adam( int64_t i = 0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]); - const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched; + const float p_decay = ((ggml_n_dims(ps[p]) >= decay_min_ndim) ? decay : 0.0f) * sched; for (int64_t j = 0; j < ne; ++j) { float x = ggml_get_f32_1d(ps[p], j); float g_ = g[i]*gnorm; @@ -19205,8 +19207,8 @@ void gguf_add_tensor( ctx->infos[idx].ne[i] = 1; } - ctx->infos[idx].n_dims = tensor->n_dims; - for (int i = 0; i < tensor->n_dims; i++) { + ctx->infos[idx].n_dims = ggml_n_dims(tensor); + for (uint32_t i = 0; i < ctx->infos[idx].n_dims; i++) { ctx->infos[idx].ne[i] = tensor->ne[i]; } diff --git a/ggml.h b/ggml.h index ae8101fab..84d6ba8b1 100644 --- a/ggml.h +++ b/ggml.h @@ -502,7 +502,6 @@ extern "C" { struct ggml_backend_buffer * buffer; - int n_dims; int64_t ne[GGML_MAX_DIMS]; // number of elements size_t nb[GGML_MAX_DIMS]; // stride in bytes: // nb[0] = ggml_type_size(type) @@ -534,7 +533,7 @@ extern "C" { void * extra; // extra things e.g. for ggml-cuda.cu - char padding[12]; + char padding[8]; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); @@ -666,6 +665,11 @@ extern "C" { GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor); GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor); GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor); + GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor); + GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor); + GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor); + GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor); + GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1); diff --git a/llama.cpp b/llama.cpp index 456807d9d..eddb70859 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8471,7 +8471,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'? // quantize only 2D tensors - quantize &= (tensor->n_dims == 2); + quantize &= (ggml_n_dims(tensor) == 2); quantize &= params->quantize_output_tensor || name != "output.weight"; quantize &= !params->only_copy;