From c3ca7a5f0546c561eb278be3f2fe335795679e01 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 29 Apr 2023 21:34:23 +0300 Subject: [PATCH] ggml : fix 32-bit ARM NEON --- ggml.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/ggml.c b/ggml.c index c9f0f09ea..4d53b4628 100644 --- a/ggml.c +++ b/ggml.c @@ -668,6 +668,33 @@ uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) { return vget_high_u8(vcombine_u8(a, b)); } +int8x16_t vzip1q_s8(int8x16_t a, int8x16_t b) { + return vcombine_s8(vget_low_s8(a), vget_low_s8(b)); +} + +int8x16_t vzip2q_s8(int8x16_t a, int8x16_t b) { + return vcombine_s8(vget_high_s8(a), vget_high_s8(b)); +} + +uint8x16_t vzip1q_u8(uint8x16_t a, uint8x16_t b) { + return vcombine_u8(vget_low_u8(a), vget_low_u8(b)); +} + +uint8x16_t vzip2q_u8(uint8x16_t a, uint8x16_t b) { + return vcombine_u8(vget_high_u8(a), vget_high_u8(b)); +} + +int32x4_t vcvtnq_s32_f32(float32x4_t v) { + int32x4_t res; + + res[0] = roundf(vgetq_lane_f32(v, 0)); + res[1] = roundf(vgetq_lane_f32(v, 1)); + res[2] = roundf(vgetq_lane_f32(v, 2)); + res[3] = roundf(vgetq_lane_f32(v, 3)); + + return res; +} + #endif #endif