ggml : sync latest (new ops, macros, refactoring) (#2106)

- add ggml_argmax()
- add ggml_tanh()
- add ggml_elu()
- refactor ggml_conv_1d() and variants
- refactor ggml_conv_2d() and variants
- add helper macros to reduce code duplication in ggml.c
This commit is contained in:
Georgi Gerganov 2023-07-04 21:54:11 +03:00 committed by GitHub
parent f257fd2550
commit ed9a54e512
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 589 additions and 1052 deletions

1512
ggml.c

File diff suppressed because it is too large Load diff

118
ggml.h
View file

@ -201,6 +201,8 @@
#define GGML_MAX_NAME 48 #define GGML_MAX_NAME 48
#define GGML_DEFAULT_N_THREADS 4 #define GGML_DEFAULT_N_THREADS 4
#define GGML_UNUSED(x) (void)(x)
#define GGML_ASSERT(x) \ #define GGML_ASSERT(x) \
do { \ do { \
if (!(x)) { \ if (!(x)) { \
@ -209,6 +211,30 @@
} \ } \
} while (0) } while (0)
// used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability.
//
// example:
//
// GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
//
#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
const type prefix##0 = (pointer)->array[0]; \
GGML_UNUSED(prefix##0);
#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
const type prefix##1 = (pointer)->array[1]; \
GGML_UNUSED(prefix##1);
#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
const type prefix##2 = (pointer)->array[2]; \
GGML_UNUSED(prefix##2);
#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
const type prefix##3 = (pointer)->array[3]; \
GGML_UNUSED(prefix##3);
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -295,12 +321,15 @@ extern "C" {
GGML_OP_SUM, GGML_OP_SUM,
GGML_OP_SUM_ROWS, GGML_OP_SUM_ROWS,
GGML_OP_MEAN, GGML_OP_MEAN,
GGML_OP_ARGMAX,
GGML_OP_REPEAT, GGML_OP_REPEAT,
GGML_OP_REPEAT_BACK, GGML_OP_REPEAT_BACK,
GGML_OP_ABS, GGML_OP_ABS,
GGML_OP_SGN, GGML_OP_SGN,
GGML_OP_NEG, GGML_OP_NEG,
GGML_OP_STEP, GGML_OP_STEP,
GGML_OP_TANH,
GGML_OP_ELU,
GGML_OP_RELU, GGML_OP_RELU,
GGML_OP_GELU, GGML_OP_GELU,
GGML_OP_GELU_QUICK, GGML_OP_GELU_QUICK,
@ -332,9 +361,8 @@ extern "C" {
GGML_OP_ROPE_BACK, GGML_OP_ROPE_BACK,
GGML_OP_ALIBI, GGML_OP_ALIBI,
GGML_OP_CLAMP, GGML_OP_CLAMP,
GGML_OP_CONV_1D_S1_PH, GGML_OP_CONV_1D,
GGML_OP_CONV_1D_S2_PH, GGML_OP_CONV_2D,
GGML_OP_CONV_2D_SK_P0,
GGML_OP_FLASH_ATTN, GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF, GGML_OP_FLASH_FF,
@ -690,6 +718,11 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a); struct ggml_tensor * a);
// argmax along rows
GGML_API struct ggml_tensor * ggml_argmax(
struct ggml_context * ctx,
struct ggml_tensor * a);
// if a is the same shape as b, and a is not parameter, return a // if a is the same shape as b, and a is not parameter, return a
// otherwise, return a new tensor: repeat(a) to fit in b // otherwise, return a new tensor: repeat(a) to fit in b
GGML_API struct ggml_tensor * ggml_repeat( GGML_API struct ggml_tensor * ggml_repeat(
@ -734,6 +767,22 @@ extern "C" {
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a); struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_tanh(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_tanh_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_elu(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_elu_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);
GGML_API struct ggml_tensor * ggml_relu( GGML_API struct ggml_tensor * ggml_relu(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a); struct ggml_tensor * a);
@ -1084,58 +1133,33 @@ extern "C" {
float min, float min,
float max); float max);
// TODO: implement general-purpose convolutions GGML_API struct ggml_tensor * ggml_conv_1d(
// GGML_API struct ggml_tensor * ggml_conv_1d(
// struct ggml_context * ctx,
// struct ggml_tensor * a,
// struct ggml_tensor * b,
// int s0
// int p0,
// int d0);
//
// GGML_API struct ggml_tensor * ggml_conv_2d(
// struct ggml_context * ctx,
// struct ggml_tensor * a,
// struct ggml_tensor * b,
// int s0,
// int s1,
// int p0,
// int p1,
// int d0,
// int d1);
// padding = half
// TODO: we don't support extra parameters for now
// that's why we are hard-coding the stride, padding, and dilation
// not great ..
// example:
// a: 3 80 768 1
// b: 3000 80 1 1
// res: 3000 768 1 1
// used in whisper
GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b); struct ggml_tensor * b,
int s0, // stride
int p0, // padding
int d0); // dilation
// used in whisper GGML_API struct ggml_tensor * ggml_conv_2d(
GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b); struct ggml_tensor * b,
int s0,
int s1,
int p0,
int p1,
int d0,
int d1);
// kernel size is a->ne[0] x a->ne[1] // conv_1d with padding = half
// stride is equal to kernel size // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
// padding is zero GGML_API struct ggml_tensor* ggml_conv_1d_ph(
// example:
// a: 16 16 3 768
// b: 1024 1024 3 1
// res: 64 64 768 1
// used in sam
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b); struct ggml_tensor * b,
int s,
int d);
GGML_API struct ggml_tensor * ggml_flash_attn( GGML_API struct ggml_tensor * ggml_flash_attn(
struct ggml_context * ctx, struct ggml_context * ctx,

View file

@ -1,6 +1,11 @@
#!/bin/bash #!/bin/bash
cp -rpv ../ggml/src/ggml.c ./ggml.c cp -rpv ../ggml/src/ggml.c ./ggml.c
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h