diff --git a/ggml.c b/ggml.c index 23c5e69..0fe1f4b 100644 --- a/ggml.c +++ b/ggml.c @@ -1576,9 +1576,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) { uint16_t t; for (int i = 0; i < n; ++i) { - ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); - memcpy(&t, &fp16, sizeof(uint16_t)); - y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]); + if (x[i] <= -10.0f) { + y[i] = 0.0f; + } else if (x[i] >= 10.0f) { + y[i] = x[i]; + } else { + ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); + memcpy(&t, &fp16, sizeof(uint16_t)); + y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]); + } } } #else @@ -5746,11 +5752,13 @@ struct ggml_tensor * ggml_pool_1d( is_node = true; } - const int64_t ne[2] = { + const int64_t ne[4] = { ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), a->ne[1], + a->ne[2], + a->ne[3], }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); int32_t params[] = { op, k0, s0, p0 }; ggml_set_op_params(result, params, sizeof(params)); @@ -15031,9 +15039,10 @@ static void ggml_compute_forward_map_custom1( return; } - struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params; + struct ggml_map_custom1_op_params p; + memcpy(&p, dst->op_params, sizeof(p)); - p->fun(dst, a, params->ith, params->nth, p->userdata); + p.fun(dst, a, params->ith, params->nth, p.userdata); } // ggml_compute_forward_map_custom2 @@ -15049,9 +15058,10 @@ static void ggml_compute_forward_map_custom2( return; } - struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params; + struct ggml_map_custom2_op_params p; + memcpy(&p, dst->op_params, sizeof(p)); - p->fun(dst, a, b, params->ith, params->nth, p->userdata); + p.fun(dst, a, b, params->ith, params->nth, p.userdata); } // ggml_compute_forward_map_custom3 @@ -15068,9 +15078,10 @@ static void ggml_compute_forward_map_custom3( return; } - struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params; + struct ggml_map_custom3_op_params p; + memcpy(&p, dst->op_params, sizeof(p)); - p->fun(dst, a, b, c, params->ith, params->nth, p->userdata); + p.fun(dst, a, b, c, params->ith, params->nth, p.userdata); } // ggml_compute_forward_cross_entropy_loss @@ -17336,29 +17347,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { } break; case GGML_OP_MAP_CUSTOM1: { - struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params; - if (p->n_tasks == GGML_N_TASKS_MAX) { + struct ggml_map_custom1_op_params p; + memcpy(&p, node->op_params, sizeof(p)); + if (p.n_tasks == GGML_N_TASKS_MAX) { n_tasks = n_threads; } else { - n_tasks = MIN(p->n_tasks, n_threads); + n_tasks = MIN(p.n_tasks, n_threads); } } break; case GGML_OP_MAP_CUSTOM2: { - struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params; - if (p->n_tasks == GGML_N_TASKS_MAX) { + struct ggml_map_custom2_op_params p; + memcpy(&p, node->op_params, sizeof(p)); + if (p.n_tasks == GGML_N_TASKS_MAX) { n_tasks = n_threads; } else { - n_tasks = MIN(p->n_tasks, n_threads); + n_tasks = MIN(p.n_tasks, n_threads); } } break; case GGML_OP_MAP_CUSTOM3: { - struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params; - if (p->n_tasks == GGML_N_TASKS_MAX) { + struct ggml_map_custom3_op_params p; + memcpy(&p, node->op_params, sizeof(p)); + if (p.n_tasks == GGML_N_TASKS_MAX) { n_tasks = n_threads; } else { - n_tasks = MIN(p->n_tasks, n_threads); + n_tasks = MIN(p.n_tasks, n_threads); } } break; case GGML_OP_CROSS_ENTROPY_LOSS: