diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 6823adc6c..b8c98354d 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -3962,18 +3962,23 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ } func = ggml_cuda_mul; break; - case GGML_OP_GELU: - if (!any_on_device) { - return false; - } - func = ggml_cuda_gelu; - break; - case GGML_OP_SILU: - if (!any_on_device) { - return false; - } - func = ggml_cuda_silu; - break; + case GGML_OP_UNARY: + switch (ggml_get_unary_op(tensor)) { + case GGML_UNARY_OP_GELU: + if (!any_on_device) { + return false; + } + func = ggml_cuda_gelu; + break; + case GGML_UNARY_OP_SILU: + if (!any_on_device) { + return false; + } + func = ggml_cuda_silu; + break; + default: + return false; + } break; case GGML_OP_NORM: if (!any_on_device) { return false; diff --git a/ggml-metal.m b/ggml-metal.m index bf3f68fe4..1fd6e857f 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -519,48 +519,56 @@ void ggml_metal_graph_compute( [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; - case GGML_OP_SILU: - { - if (encoder == nil) { - encoder = [command_buffer computeCommandEncoder]; - } + case GGML_OP_UNARY: + switch (ggml_get_unary_op(gf->nodes[i])) { + case GGML_UNARY_OP_SILU: + { + if (encoder == nil) { + encoder = [command_buffer computeCommandEncoder]; + } - [encoder setComputePipelineState:ctx->pipeline_silu]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + [encoder setComputePipelineState:ctx->pipeline_silu]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - const int64_t n = ggml_nelements(dst); + const int64_t n = ggml_nelements(dst); - [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_RELU: + { + if (encoder == nil) { + encoder = [command_buffer computeCommandEncoder]; + } + + [encoder setComputePipelineState:ctx->pipeline_relu]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_GELU: + { + if (encoder == nil) { + encoder = [command_buffer computeCommandEncoder]; + } + + [encoder setComputePipelineState:ctx->pipeline_gelu]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + default: + { + fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); + GGML_ASSERT(false); + } } break; - case GGML_OP_RELU: - { - if (encoder == nil) { - encoder = [command_buffer computeCommandEncoder]; - } - - [encoder setComputePipelineState:ctx->pipeline_relu]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - - const int64_t n = ggml_nelements(dst); - - [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; - } break; - case GGML_OP_GELU: - { - if (encoder == nil) { - encoder = [command_buffer computeCommandEncoder]; - } - - [encoder setComputePipelineState:ctx->pipeline_gelu]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - - const int64_t n = ggml_nelements(dst); - - [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; - } break; case GGML_OP_SOFT_MAX: { if (encoder == nil) { @@ -979,8 +987,10 @@ void ggml_metal_graph_compute( [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; } break; default: - fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - GGML_ASSERT(false); + { + fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); + GGML_ASSERT(false); + } } } diff --git a/ggml.c b/ggml.c index 9ee4a8d7f..960b80577 100644 --- a/ggml.c +++ b/ggml.c @@ -3440,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float //inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; } inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { -#if defined(GGML_SIMD) +#if defined(GGML_USE_ACCELERATE) + vDSP_vsmul(y, 1, &v, y, 1, n); +#elif defined(GGML_SIMD) const int np = (n & ~(GGML_F32_STEP - 1)); GGML_F32_VEC vx = GGML_F32_VEC_SET1(v); @@ -3603,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) { #endif } -inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x) { +inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) { sum += (ggml_float)x[i]; @@ -3611,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x *s = sum; } +inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) { + float sum = 0.0f; + for (int i = 0; i < n; ++i) { + sum += GGML_FP16_TO_FP32(x[i]); + } + *s = sum; +} + inline static void ggml_vec_max_f32(const int n, float * s, const float * x) { #ifndef GGML_USE_ACCELERATE float max = -INFINITY; @@ -3750,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "ARGMAX", "REPEAT", "REPEAT_BACK", - "ABS", - "SGN", - "NEG", - "STEP", - "TANH", - "ELU", - "RELU", - "GELU", - "GELU_QUICK", - "SILU", "SILU_BACK", "NORM", "RMS_NORM", @@ -3798,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "WIN_PART", "WIN_UNPART", + "UNARY", + "MAP_UNARY", "MAP_BINARY", @@ -3809,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS_BACK", }; -static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); +static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -3830,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "argmax(x)", "repeat(x)", "repeat_back(x)", - "abs(x)", - "sgn(x)", - "-x", - "step(x)", - "tanh(x)", - "elu(x)", - "relu(x)", - "gelu(x)", - "gelu_quick(x)", - "silu(x)", "silu_back(x)", "norm(x)", "rms_norm(x)", @@ -3878,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "win_part(x)", "win_unpart(x)", + "unary(x)", + "f(x)", "f(x,y)", @@ -3889,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss_back(x,y)", }; -static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); +static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); @@ -4145,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) { return GGML_OP_NAME[op]; } +const char * ggml_op_symbol(enum ggml_op op) { + return GGML_OP_SYMBOL[op]; +} + size_t ggml_element_size(const struct ggml_tensor * tensor) { return GGML_TYPE_SIZE[tensor->type]; } @@ -4443,6 +4441,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) return result; } +bool ggml_get_no_alloc(struct ggml_context * ctx) { + return ctx->no_alloc; +} + void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) { ctx->no_alloc = no_alloc; } @@ -4480,7 +4482,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { // this is an error prone process, but it is necessary to support inplace // operators when using scratch buffers // TODO: implement a better way -void ggml_scratch_save(struct ggml_context * ctx) { +static void ggml_scratch_save(struct ggml_context * ctx) { // this is needed to allow opt tensors to store their data // TODO: again, need to find a better way ctx->no_alloc_save = ctx->no_alloc; @@ -4490,7 +4492,7 @@ void ggml_scratch_save(struct ggml_context * ctx) { ctx->scratch.data = NULL; } -void ggml_scratch_load(struct ggml_context * ctx) { +static void ggml_scratch_load(struct ggml_context * ctx) { ctx->no_alloc = ctx->no_alloc_save; ctx->scratch = ctx->scratch_save; @@ -4498,7 +4500,7 @@ void ggml_scratch_load(struct ggml_context * ctx) { //////////////////////////////////////////////////////////////////////////////// -struct ggml_tensor * ggml_new_tensor_impl( +static struct ggml_tensor * ggml_new_tensor_impl( struct ggml_context * ctx, enum ggml_type type, int n_dims, @@ -4621,6 +4623,21 @@ struct ggml_tensor * ggml_new_tensor_impl( return result; } +static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { + assert(params_size <= GGML_MAX_OP_PARAMS); + memcpy(tensor->op_params, params, params_size); +} + +static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { + assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); + return ((const int32_t *)(tensor->op_params))[i]; +} + +static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { + assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); + ((int32_t *)(tensor->op_params))[i] = value; +} + struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, enum ggml_type type, @@ -4952,6 +4969,16 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) { return (float *)(tensor->data); } +enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) { + GGML_ASSERT(tensor->op == GGML_OP_UNARY); + return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0); +} + +static void ggml_set_unary_op(struct ggml_tensor * tensor, enum ggml_unary_op op) { + GGML_ASSERT(tensor->op = GGML_OP_UNARY); + ggml_set_op_params_i32(tensor, 0, (int32_t) op); +} + const char * ggml_get_name(const struct ggml_tensor * tensor) { return tensor->name; } @@ -4970,11 +4997,6 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * return tensor; } -static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { - assert(params_size <= GGML_MAX_OP_PARAMS); - memcpy(tensor->op_params, params, params_size); -} - struct ggml_tensor * ggml_view_tensor( struct ggml_context * ctx, const struct ggml_tensor * src) { @@ -5010,7 +5032,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam // ggml_dup -struct ggml_tensor * ggml_dup_impl( +static struct ggml_tensor * ggml_dup_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5043,7 +5065,7 @@ struct ggml_tensor * ggml_dup_inplace( // ggml_add -struct ggml_tensor * ggml_add_impl( +static struct ggml_tensor * ggml_add_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5086,7 +5108,7 @@ struct ggml_tensor * ggml_add_inplace( // ggml_add1 -struct ggml_tensor * ggml_add1_impl( +static struct ggml_tensor * ggml_add1_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5126,7 +5148,7 @@ struct ggml_tensor * ggml_add1_inplace( // ggml_acc -struct ggml_tensor * ggml_acc_impl( +static struct ggml_tensor * ggml_acc_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5183,7 +5205,7 @@ struct ggml_tensor * ggml_acc_inplace( // ggml_sub -struct ggml_tensor * ggml_sub_impl( +static struct ggml_tensor * ggml_sub_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5222,7 +5244,7 @@ struct ggml_tensor * ggml_sub_inplace( // ggml_mul -struct ggml_tensor * ggml_mul_impl( +static struct ggml_tensor * ggml_mul_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5269,7 +5291,7 @@ struct ggml_tensor * ggml_mul_inplace( // ggml_div -struct ggml_tensor * ggml_div_impl( +static struct ggml_tensor * ggml_div_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -5312,7 +5334,7 @@ struct ggml_tensor * ggml_div_inplace( // ggml_sqr -struct ggml_tensor * ggml_sqr_impl( +static struct ggml_tensor * ggml_sqr_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5345,7 +5367,7 @@ struct ggml_tensor * ggml_sqr_inplace( // ggml_sqrt -struct ggml_tensor * ggml_sqrt_impl( +static struct ggml_tensor * ggml_sqrt_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5379,7 +5401,7 @@ struct ggml_tensor * ggml_sqrt_inplace( // ggml_log -struct ggml_tensor * ggml_log_impl( +static struct ggml_tensor * ggml_log_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5559,333 +5581,142 @@ struct ggml_tensor * ggml_repeat_back( // ggml_abs -struct ggml_tensor * ggml_abs_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_ABS; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_abs( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_abs_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_ABS); } struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_abs_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } - // ggml_sgn -struct ggml_tensor * ggml_sgn_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_SGN; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_sgn_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_SGN); } struct ggml_tensor * ggml_sgn_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_sgn_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN); } // ggml_neg -struct ggml_tensor * ggml_neg_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_NEG; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_neg( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_neg_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_NEG); } struct ggml_tensor * ggml_neg_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_neg_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG); } // ggml_step -struct ggml_tensor * ggml_step_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_STEP; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_step( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_step_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_STEP); } struct ggml_tensor * ggml_step_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_step_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP); } // ggml_tanh -struct ggml_tensor * ggml_tanh_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_TANH; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_tanh( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_tanh_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_TANH); } struct ggml_tensor * ggml_tanh_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_tanh_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH); } // ggml_elu -struct ggml_tensor * ggml_elu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_ELU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_elu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_elu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_ELU); } struct ggml_tensor * ggml_elu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_elu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU); } // ggml_relu -struct ggml_tensor * ggml_relu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_RELU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_relu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_relu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_RELU); } struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_relu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU); } // ggml_gelu -struct ggml_tensor * ggml_gelu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_GELU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_gelu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_GELU); } struct ggml_tensor * ggml_gelu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU); } // ggml_gelu_quick -struct ggml_tensor * ggml_gelu_quick_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_GELU_QUICK; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_quick_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK); } struct ggml_tensor * ggml_gelu_quick_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_gelu_quick_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK); } // ggml_silu -struct ggml_tensor * ggml_silu_impl( - struct ggml_context * ctx, - struct ggml_tensor * a, - bool inplace) { - bool is_node = false; - - if (!inplace && (a->grad)) { - is_node = true; - } - - struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - - result->op = GGML_OP_SILU; - result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; - result->src[0] = a; - - return result; -} - struct ggml_tensor * ggml_silu( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_silu_impl(ctx, a, false); + return ggml_unary(ctx, a, GGML_UNARY_OP_SILU); } struct ggml_tensor * ggml_silu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { - return ggml_silu_impl(ctx, a, true); + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU); } // ggml_silu_back @@ -5913,7 +5744,7 @@ struct ggml_tensor * ggml_silu_back( // ggml_norm -struct ggml_tensor * ggml_norm_impl( +static struct ggml_tensor * ggml_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -5947,7 +5778,7 @@ struct ggml_tensor * ggml_norm_inplace( return ggml_norm_impl(ctx, a, true); } -struct ggml_tensor * ggml_rms_norm_impl( +static struct ggml_tensor * ggml_rms_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -6056,7 +5887,7 @@ struct ggml_tensor * ggml_out_prod( // ggml_scale -struct ggml_tensor * ggml_scale_impl( +static struct ggml_tensor * ggml_scale_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6096,7 +5927,7 @@ struct ggml_tensor * ggml_scale_inplace( // ggml_set -struct ggml_tensor * ggml_set_impl( +static struct ggml_tensor * ggml_set_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6186,7 +6017,7 @@ struct ggml_tensor * ggml_set_2d_inplace( // ggml_cpy -struct ggml_tensor * ggml_cpy_impl( +static struct ggml_tensor * ggml_cpy_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6231,7 +6062,7 @@ struct ggml_tensor * ggml_cpy_inplace( // ggml_cont -struct ggml_tensor * ggml_cont_impl( +static struct ggml_tensor * ggml_cont_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -6701,7 +6532,7 @@ struct ggml_tensor * ggml_diag( // ggml_diag_mask_inf -struct ggml_tensor * ggml_diag_mask_inf_impl( +static struct ggml_tensor * ggml_diag_mask_inf_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, @@ -6741,7 +6572,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace( // ggml_diag_mask_zero -struct ggml_tensor * ggml_diag_mask_zero_impl( +static struct ggml_tensor * ggml_diag_mask_zero_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, @@ -6780,7 +6611,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace( // ggml_soft_max -struct ggml_tensor * ggml_soft_max_impl( +static struct ggml_tensor * ggml_soft_max_impl( struct ggml_context * ctx, struct ggml_tensor * a, bool inplace) { @@ -6814,7 +6645,7 @@ struct ggml_tensor * ggml_soft_max_inplace( // ggml_soft_max_back -struct ggml_tensor * ggml_soft_max_back_impl( +static struct ggml_tensor * ggml_soft_max_back_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -6851,7 +6682,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace( // ggml_rope -struct ggml_tensor * ggml_rope_impl( +static struct ggml_tensor * ggml_rope_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, @@ -7363,9 +7194,47 @@ struct ggml_tensor * ggml_win_unpart( return result; } +// gmml_unary + +static struct ggml_tensor * ggml_unary_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + ggml_set_unary_op(result, op); + + result->op = GGML_OP_UNARY; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src[0] = a; + + return result; +} + +struct ggml_tensor * ggml_unary( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op) { + return ggml_unary_impl(ctx, a, op, false); +} + +struct ggml_tensor * ggml_unary_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op) { + return ggml_unary_impl(ctx, a, op, true); +} + // ggml_map_unary -struct ggml_tensor * ggml_map_unary_impl_f32( +static struct ggml_tensor * ggml_map_unary_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_unary_op_f32_t fun, @@ -7403,7 +7272,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32( // ggml_map_binary -struct ggml_tensor * ggml_map_binary_impl_f32( +static struct ggml_tensor * ggml_map_binary_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -7447,7 +7316,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32( // ggml_map_custom1 -struct ggml_tensor * ggml_map_custom1_impl_f32( +static struct ggml_tensor * ggml_map_custom1_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_f32_t fun, @@ -7485,7 +7354,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32( // ggml_map_custom2 -struct ggml_tensor * ggml_map_custom2_impl_f32( +static struct ggml_tensor * ggml_map_custom2_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -7527,7 +7396,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32( // ggml_map_custom3 -struct ggml_tensor * ggml_map_custom3_impl_f32( +static struct ggml_tensor * ggml_map_custom3_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, @@ -9292,7 +9161,7 @@ static void ggml_compute_forward_sum_f32( for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { - ggml_vec_sum_ggf(ne00, + ggml_vec_sum_f32_ggf(ne00, &row_sum, (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); sum += row_sum; @@ -9302,6 +9171,38 @@ static void ggml_compute_forward_sum_f32( ((float *) dst->data)[0] = sum; } +static void ggml_compute_forward_sum_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(ggml_is_scalar(dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + assert(src0->nb[0] == sizeof(ggml_fp16_t)); + + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); + GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); + + float sum = 0; + float row_sum = 0; + + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = 0; i01 < ne01; i01++) { + ggml_vec_sum_f16_ggf(ne00, + &row_sum, + (ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03)); + sum += row_sum; + } + } + } + ((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum); +} + static void ggml_compute_forward_sum( const struct ggml_compute_params * params, const struct ggml_tensor * src0, @@ -9311,6 +9212,10 @@ static void ggml_compute_forward_sum( { ggml_compute_forward_sum_f32(params, src0, dst); } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_sum_f16(params, src0, dst); + } break; default: { GGML_ASSERT(false); @@ -10077,7 +9982,6 @@ static void ggml_compute_forward_silu( } } - // ggml_compute_forward_silu_back static void ggml_compute_forward_silu_back_f32( @@ -14122,6 +14026,62 @@ static void ggml_compute_forward_win_unpart( } } +//gmml_compute_forward_unary + +static void ggml_compute_forward_unary( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + const enum ggml_unary_op op = ggml_get_unary_op(dst); + + switch (op) { + case GGML_UNARY_OP_ABS: + { + ggml_compute_forward_abs(params, src0, dst); + } break; + case GGML_UNARY_OP_SGN: + { + ggml_compute_forward_sgn(params, src0, dst); + } break; + case GGML_UNARY_OP_NEG: + { + ggml_compute_forward_neg(params, src0, dst); + } break; + case GGML_UNARY_OP_STEP: + { + ggml_compute_forward_step(params, src0, dst); + } break; + case GGML_UNARY_OP_TANH: + { + ggml_compute_forward_tanh(params, src0, dst); + } break; + case GGML_UNARY_OP_ELU: + { + ggml_compute_forward_elu(params, src0, dst); + } break; + case GGML_UNARY_OP_RELU: + { + ggml_compute_forward_relu(params, src0, dst); + } break; + case GGML_UNARY_OP_GELU: + { + ggml_compute_forward_gelu(params, src0, dst); + } break; + case GGML_UNARY_OP_GELU_QUICK: + { + ggml_compute_forward_gelu_quick(params, src0, dst); + } break; + case GGML_UNARY_OP_SILU: + { + ggml_compute_forward_silu(params, src0, dst); + } break; + default: + { + GGML_ASSERT(false); + } break; + } +} + // ggml_compute_forward_map_unary static void ggml_compute_forward_map_unary_f32( @@ -14682,46 +14642,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_repeat_back(params, tensor->src[0], tensor); } break; - case GGML_OP_ABS: - { - ggml_compute_forward_abs(params, tensor->src[0], tensor); - } break; - case GGML_OP_SGN: - { - ggml_compute_forward_sgn(params, tensor->src[0], tensor); - } break; - case GGML_OP_NEG: - { - ggml_compute_forward_neg(params, tensor->src[0], tensor); - } break; - case GGML_OP_STEP: - { - ggml_compute_forward_step(params, tensor->src[0], tensor); - } break; - case GGML_OP_TANH: - { - ggml_compute_forward_tanh(params, tensor->src[0], tensor); - } break; - case GGML_OP_ELU: - { - ggml_compute_forward_elu(params, tensor->src[0], tensor); - } break; - case GGML_OP_RELU: - { - ggml_compute_forward_relu(params, tensor->src[0], tensor); - } break; - case GGML_OP_GELU: - { - ggml_compute_forward_gelu(params, tensor->src[0], tensor); - } break; - case GGML_OP_GELU_QUICK: - { - ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor); - } break; - case GGML_OP_SILU: - { - ggml_compute_forward_silu(params, tensor->src[0], tensor); - } break; case GGML_OP_SILU_BACK: { ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor); @@ -14864,6 +14784,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_win_unpart(params, tensor->src[0], tensor); } break; + case GGML_OP_UNARY: + { + ggml_compute_forward_unary(params, tensor->src[0], tensor); + } break; case GGML_OP_MAP_UNARY: { ggml_unary_op_f32_t fun; @@ -15112,73 +15036,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor inplace); } } break; - case GGML_OP_ABS: - { - if (src0->grad) { - src0->grad = - ggml_add_impl(ctx, - src0->grad, - ggml_mul(ctx, - ggml_sgn(ctx, src0), - tensor->grad), - inplace); - } - } break; - case GGML_OP_SGN: - { - if (src0->grad) { - // noop - } - } break; - case GGML_OP_NEG: - { - if (src0->grad) { - src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace); - } - } break; - case GGML_OP_STEP: - { - if (src0->grad) { - // noop - } - } break; - case GGML_OP_TANH: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_ELU: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_RELU: - { - if (src0->grad) { - src0->grad = ggml_sub_impl(ctx, - src0->grad, - ggml_mul(ctx, - ggml_step(ctx, src0), - tensor->grad), - inplace); - } - } break; - case GGML_OP_GELU: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_GELU_QUICK: - { - GGML_ASSERT(false); // TODO: not implemented - } break; - case GGML_OP_SILU: - { - // necessary for llama - if (src0->grad) { - src0->grad = ggml_add_impl(ctx, - src0->grad, - ggml_silu_back(ctx, src0, tensor->grad), - inplace); - } - } break; case GGML_OP_SILU_BACK: { GGML_ASSERT(false); // TODO: not implemented @@ -15440,9 +15297,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_DIAG_MASK_ZERO: { @@ -15454,9 +15308,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_SOFT_MAX: { @@ -15491,9 +15342,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor n_ctx), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_ROPE_BACK: { @@ -15512,9 +15360,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor n_ctx), inplace); } - if (src1->grad) { - // noop - } } break; case GGML_OP_ALIBI: { @@ -15707,6 +15552,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } break; case GGML_OP_WIN_PART: case GGML_OP_WIN_UNPART: + case GGML_OP_UNARY: + { + switch (ggml_get_unary_op(tensor)) { + case GGML_UNARY_OP_ABS: + { + if (src0->grad) { + src0->grad = + ggml_add_impl(ctx, + src0->grad, + ggml_mul(ctx, + ggml_sgn(ctx, src0), + tensor->grad), + inplace); + } + } break; + case GGML_UNARY_OP_SGN: + { + if (src0->grad) { + // noop + } + } break; + case GGML_UNARY_OP_NEG: + { + if (src0->grad) { + src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace); + } + } break; + case GGML_UNARY_OP_STEP: + { + if (src0->grad) { + // noop + } + } break; + case GGML_UNARY_OP_TANH: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_ELU: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_RELU: + { + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_mul(ctx, + ggml_step(ctx, src0), + tensor->grad), + inplace); + } + } break; + case GGML_UNARY_OP_GELU: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_GELU_QUICK: + { + GGML_ASSERT(false); // TODO: not implemented + } break; + case GGML_UNARY_OP_SILU: + { + // necessary for llama + if (src0->grad) { + src0->grad = ggml_add_impl(ctx, + src0->grad, + ggml_silu_back(ctx, src0, tensor->grad), + inplace); + } + } break; + default: + GGML_ASSERT(false); + } + } break; case GGML_OP_MAP_UNARY: case GGML_OP_MAP_BINARY: case GGML_OP_MAP_CUSTOM1: @@ -15937,7 +15856,7 @@ typedef pthread_t ggml_thread_t; // Android's libc implementation "bionic" does not support setting affinity #if defined(__linux__) && !defined(__BIONIC__) -void set_numa_thread_affinity(int thread_n, int n_threads) { +static void set_numa_thread_affinity(int thread_n, int n_threads) { if (!ggml_is_numa()) { return; } @@ -15962,7 +15881,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) { CPU_FREE(cpus); } -void clear_numa_thread_affinity(void) { +static void clear_numa_thread_affinity(void) { if (!ggml_is_numa()) { return; } @@ -15986,8 +15905,8 @@ void clear_numa_thread_affinity(void) { #else // TODO: Windows etc. // (the linux implementation may also work on BSD, someone should test) -void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); } -void clear_numa_thread_affinity(void) {} +static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); } +static void clear_numa_thread_affinity(void) {} #endif struct ggml_compute_state_shared { @@ -16199,21 +16118,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { case GGML_OP_ARGMAX: case GGML_OP_REPEAT: case GGML_OP_REPEAT_BACK: - case GGML_OP_ABS: - case GGML_OP_SGN: - case GGML_OP_NEG: - case GGML_OP_STEP: - case GGML_OP_TANH: - case GGML_OP_ELU: - case GGML_OP_RELU: - { + { n_tasks = 1; } break; - case GGML_OP_MUL: - case GGML_OP_GELU: - case GGML_OP_GELU_QUICK: - case GGML_OP_SILU: + + case GGML_OP_UNARY: + { + switch (ggml_get_unary_op(node)) { + case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_SGN: + case GGML_UNARY_OP_NEG: + case GGML_UNARY_OP_STEP: + case GGML_UNARY_OP_TANH: + case GGML_UNARY_OP_ELU: + case GGML_UNARY_OP_RELU: + { + n_tasks = 1; + } break; + + case GGML_UNARY_OP_GELU: + case GGML_UNARY_OP_GELU_QUICK: + case GGML_UNARY_OP_SILU: + { + n_tasks = n_threads; + } break; + } + } break; case GGML_OP_SILU_BACK: + case GGML_OP_MUL: case GGML_OP_NORM: case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM_BACK: @@ -16728,7 +16660,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { fwrite(&nb, sizeof(uint64_t), 1, fout); } - fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout); // dump the data // TODO: pad this to 32 byte boundary @@ -16761,7 +16694,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { fwrite(&nb, sizeof(uint64_t), 1, fout); } - fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout); + fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout); // output the op arguments { @@ -16942,7 +16876,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** tensor->op = (enum ggml_op) op; - memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME; + memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME; + memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS; tensor->data = (void *) ptr; @@ -16987,7 +16922,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** nb[j] = nb_cur; } - const char * ptr_name = ptr; ptr += GGML_MAX_NAME; + const char * ptr_name = ptr; ptr += GGML_MAX_NAME; + const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS; const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t); @@ -17024,8 +16960,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** { tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0); - uint64_t offs; - memcpy(&offs, tensor->op_params, sizeof(offs)); + size_t offs; + memcpy(&offs, ptr_op_params, sizeof(offs)); tensor->data = ((char *) tensor->data) + offs; } break; @@ -17045,7 +16981,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** } break; } - memcpy(tensor->name, ptr_name, GGML_MAX_NAME); + memcpy(tensor->name, ptr_name, GGML_MAX_NAME); + memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS); for (int j = 0; j < GGML_MAX_DIMS; ++j) { tensor->nb[j] = nb[j]; @@ -17079,7 +17016,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n", i, node->ne[0], node->ne[1], node->ne[2], - GGML_OP_NAME[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, + ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, (double) node->perf_cycles / (double) ggml_cycles_per_ms(), (double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs, (double) node->perf_time_us / 1000.0, @@ -17093,7 +17030,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n", i, node->ne[0], node->ne[1], - GGML_OP_NAME[node->op]); + ggml_op_name(node->op)); } for (int i = 0; i < GGML_OP_COUNT; i++) { @@ -17101,7 +17038,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { continue; } - GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", GGML_OP_NAME[i], (double) perf_total_per_op_us[i] / 1000.0); + GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0); } GGML_PRINT("========================================\n"); @@ -17195,13 +17132,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph } if (node->n_dims == 2) { - fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], GGML_OP_SYMBOL[node->op]); + fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op)); } else { - fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], node->ne[2], GGML_OP_SYMBOL[node->op]); + fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | %s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op)); } if (node->grad) { - fprintf(fp, " | %s\"; ]\n", GGML_OP_SYMBOL[node->grad->op]); + fprintf(fp, " | %s\"; ]\n", ggml_op_symbol(node->grad->op)); } else { fprintf(fp, "\"; ]\n"); } diff --git a/ggml.h b/ggml.h index 871c85a89..de44fba9e 100644 --- a/ggml.h +++ b/ggml.h @@ -330,16 +330,6 @@ extern "C" { GGML_OP_ARGMAX, GGML_OP_REPEAT, GGML_OP_REPEAT_BACK, - GGML_OP_ABS, - GGML_OP_SGN, - GGML_OP_NEG, - GGML_OP_STEP, - GGML_OP_TANH, - GGML_OP_ELU, - GGML_OP_RELU, - GGML_OP_GELU, - GGML_OP_GELU_QUICK, - GGML_OP_SILU, GGML_OP_SILU_BACK, GGML_OP_NORM, // normalize GGML_OP_RMS_NORM, @@ -378,6 +368,8 @@ extern "C" { GGML_OP_WIN_PART, GGML_OP_WIN_UNPART, + GGML_OP_UNARY, + GGML_OP_MAP_UNARY, GGML_OP_MAP_BINARY, @@ -391,6 +383,18 @@ extern "C" { GGML_OP_COUNT, }; + enum ggml_unary_op { + GGML_UNARY_OP_ABS, + GGML_UNARY_OP_SGN, + GGML_UNARY_OP_NEG, + GGML_UNARY_OP_STEP, + GGML_UNARY_OP_TANH, + GGML_UNARY_OP_ELU, + GGML_UNARY_OP_RELU, + GGML_UNARY_OP_GELU, + GGML_UNARY_OP_GELU_QUICK, + GGML_UNARY_OP_SILU, + }; // ggml object struct ggml_object { @@ -535,6 +539,7 @@ extern "C" { GGML_API const char * ggml_type_name(enum ggml_type type); GGML_API const char * ggml_op_name (enum ggml_op op); + GGML_API const char * ggml_op_symbol(enum ggml_op op); GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor); @@ -558,6 +563,7 @@ extern "C" { GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); + GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx); GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx); @@ -617,9 +623,11 @@ extern "C" { GGML_API void * ggml_get_data (const struct ggml_tensor * tensor); GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); - GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor); - GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name); - GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...); + GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); + + GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor); + GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name); + GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...); // // operations on tensors with backpropagation @@ -629,6 +637,11 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_dup_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_add( struct ggml_context * ctx, struct ggml_tensor * a, @@ -952,11 +965,22 @@ extern "C" { struct ggml_tensor * a, struct ggml_tensor * b); + // a -> b, in-place, return view(b) + GGML_API struct ggml_tensor * ggml_cpy_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + // make contiguous GGML_API struct ggml_tensor * ggml_cont( struct ggml_context * ctx, struct ggml_tensor * a); + // make contiguous, in-place + GGML_API struct ggml_tensor * ggml_cont_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + // return view(a), b specifies the new shape // TODO: when we start computing gradient, make a copy instead of view GGML_API struct ggml_tensor * ggml_reshape( @@ -1268,6 +1292,16 @@ extern "C" { typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); + GGML_API struct ggml_tensor * ggml_unary( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op); + + GGML_API struct ggml_tensor * ggml_unary_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_unary_op op); + GGML_API struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, diff --git a/tests/test-grad0.c b/tests/test-grad0.c index 01467bc18..ef20bce51 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) { } } -struct ggml_tensor * get_random_tensor( +struct ggml_tensor * get_random_tensor_f32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor( return result; } -struct ggml_tensor * get_random_tensor_int( +struct ggml_tensor * get_random_tensor_f16( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +struct ggml_tensor * get_random_tensor_i32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -160,23 +208,6 @@ struct ggml_tensor * get_random_tensor_int( return result; } -float get_element(const struct ggml_tensor * t, int idx) { - if (t->type == GGML_TYPE_F32) { - return ((float *)t->data)[idx]; - } - - if (t->type == GGML_TYPE_I32) { - return ((int32_t *)t->data)[idx]; - } - - assert(false); - return INFINITY; -} - -void set_element(struct ggml_tensor * t, int idx, float value) { - ((float *)t->data)[idx] = value; -} - void print_elements(const char* label, const struct ggml_tensor * t) { if (!t) { printf("%s: %s = null\n", __func__, label); @@ -186,7 +217,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) { printf("%s: %s = [", __func__, label); for (int k = 0; k < nelements; ++k) { if (k > 0) { printf(", "); } - printf("%.5f", get_element(t, k)); + printf("%.5f", ggml_get_f32_1d(t, k)); } printf("] shape: ["); for (int k = 0; k < t->n_dims; ++k) { @@ -237,23 +268,23 @@ bool check_gradient( const int nelements = ggml_nelements(x[i]); for (int k = 0; k < nelements; ++k) { // compute gradient using finite differences - const float x0 = get_element(x[i], k); + const float x0 = ggml_get_f32_1d(x[i], k); const float xm = x0 - eps; const float xp = x0 + eps; - set_element(x[i], k, xp); + ggml_set_f32_1d(x[i], k, xp); ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); const float f0 = ggml_get_f32_1d(f, 0); - set_element(x[i], k, xm); + ggml_set_f32_1d(x[i], k, xm); ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); const float f1 = ggml_get_f32_1d(f, 0); const float g0 = (f0 - f1)/(2.0f*eps); - set_element(x[i], k, x0); + ggml_set_f32_1d(x[i], k, x0); // compute gradient using backward graph ggml_graph_reset (&gf); @@ -261,7 +292,7 @@ bool check_gradient( ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); - const float g1 = get_element(x[i]->grad, k); + const float g1 = ggml_get_f32_1d(x[i]->grad, k); const float error_abs = fabsf(g0 - g1); const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; @@ -392,19 +423,35 @@ int main(int argc, const char ** argv) { struct ggml_tensor * x[MAX_NARGS]; - // add + // add f32 { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + } + } + + // add f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); + + check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); } } @@ -414,7 +461,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -430,7 +477,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -446,7 +493,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -462,7 +509,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -478,7 +525,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -494,7 +541,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -510,7 +557,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -527,7 +574,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -537,6 +584,40 @@ int main(int argc, const char ** argv) { } } + // mean, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); + + check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // argmax + if (0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); + + check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // repeat { int64_t ne2[4]; @@ -549,15 +630,36 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); } + } + // repeat back + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); + + check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } } // abs (finite differences do not work) @@ -566,7 +668,7 @@ int main(int argc, const char ** argv) { // for (int ndims = 1; ndims <= 2; ++ndims) { // for (int i = 0; i < nargs; ++i) { - // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); // ggml_set_param(ctx0, x[i]); // } @@ -576,17 +678,82 @@ int main(int argc, const char ** argv) { // } //} + // sgn + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); + + check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // neg + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); + + check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // step + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); + + check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // tanh, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); + + check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // mul_mat { const int nargs = 2; for (int ndims = 2; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); { int64_t ne2[4]; get_random_dims(ne2, 4); ne2[0] = ne[0]; - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); } ggml_set_param(ctx0, x[0]); @@ -602,13 +769,63 @@ int main(int argc, const char ** argv) { } } + // elu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); + + check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // relu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); + + check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // gelu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); + + check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // silu { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -629,7 +846,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -647,8 +864,8 @@ int main(int argc, const char ** argv) { ne2[0] = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); @@ -659,20 +876,37 @@ int main(int argc, const char ** argv) { } } - // cpy + // cpy f32 { const int nargs = 2; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); - check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); } } @@ -689,8 +923,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -712,8 +946,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -729,7 +963,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -737,7 +971,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -758,7 +992,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -766,7 +1000,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -790,7 +1024,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 3; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 3); @@ -798,7 +1032,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 3); } - x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -824,7 +1058,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 4; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 4); @@ -832,7 +1066,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); } - x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -858,7 +1092,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -866,7 +1100,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -887,7 +1121,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -895,7 +1129,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -915,7 +1149,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -941,7 +1175,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 2); while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { @@ -971,7 +1205,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 3); while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { @@ -1010,7 +1244,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1043,7 +1277,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1060,8 +1294,8 @@ int main(int argc, const char ** argv) { int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); ggml_set_param(ctx0, x[0]); @@ -1075,7 +1309,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1090,7 +1324,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1108,7 +1342,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); @@ -1125,8 +1359,8 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); @@ -1136,7 +1370,7 @@ int main(int argc, const char ** argv) { } } - // rope + // rope f32 { const int nargs = 1; @@ -1148,7 +1382,7 @@ int main(int argc, const char ** argv) { for (int ndims = 3; ndims <= 4; ++ndims) { for (int mode = 0; mode < 4; ++mode) { for (int n_past = 1; n_past < ne2[2]; ++n_past) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1163,14 +1397,48 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); - GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); - check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); } } } } - // flash_attn + // rope f16 + { + const int nargs = 1; + + int64_t ne2[4]; + get_random_dims(ne2, 4); + ne2[0] += ne2[0] % 2; + int n_rot = ne2[0]; + + for (int ndims = 3; ndims <= 4; ++ndims) { + for (int mode = 0; mode < 4; ++mode) { + for (int n_past = 1; n_past < ne2[2]; ++n_past) { + x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + + const bool skip_past = (mode & 1); + if (skip_past) { + // we have no past, so this would have to work on uninitialized memory. + // we only test the gradients here; + // skip_past should have no influence on gradient computation. + // so when other modes work, we assume that this does as well. + continue; + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); + + GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); + } + } + } + } + + // flash_attn f32 { const int nargs = 3; @@ -1196,16 +1464,57 @@ int main(int argc, const char ** argv) { nek[3] = 1; nev[3] = 1; } - x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f); - x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f); - x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f); + x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); ggml_set_param(ctx0, x[2]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); - check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); + check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); + } + } + } + + // flash_attn f16, not yet fully implemented + if(0) + { + const int nargs = 3; + + int64_t ne2[4]; + + get_random_dims(ne2, 4); + int64_t D = ne2[0]; + int64_t N = ne2[1]; + int64_t M = ne2[2] + N; + int64_t B = ne2[3]; + + for (int masked = 0; masked <= 1; ++masked) { + for (int ndims = 2; ndims <= 4; ++ndims) { + int64_t neq[4] = { D, N, B, ne[3] }; + int64_t nek[4] = { D, M, B, ne[3] }; + int64_t nev[4] = { M, D, B, ne[3] }; + if (ndims == 2) { + neq[2] = 1; neq[3] = 1; + nek[2] = 1; nek[3] = 1; + nev[2] = 1; nev[3] = 1; + } else if (ndims == 3) { + neq[3] = 1; + nek[3] = 1; + nev[3] = 1; + } + x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f); + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + ggml_set_param(ctx0, x[2]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); + + check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); } } } diff --git a/tests/test-opt.c b/tests/test-opt.c index 5531814c4..4eef62bcf 100644 --- a/tests/test-opt.c +++ b/tests/test-opt.c @@ -125,9 +125,9 @@ int main(void) { }; struct ggml_context * ctx = ggml_init(params); - int64_t ne1[4] = {4, 1024, 1, 1}; - int64_t ne2[4] = {4, 2048, 1, 1};; - int64_t ne3[4] = {1024, 2048, 1, 1}; + int64_t ne1[4] = {4, 128, 1, 1}; + int64_t ne2[4] = {4, 256, 1, 1};; + int64_t ne3[4] = {128, 256, 1, 1}; struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1);