From 5ec1e0edfaf34422dd4a660ca0064a40fc5fa135 Mon Sep 17 00:00:00 2001 From: slaren Date: Mon, 19 Feb 2024 09:04:45 +0100 Subject: [PATCH] cuda, metal : fix nans in soft_max (llama/5574) * cuda : fix nans in soft_max * metal : fix nans in soft_max --------- Co-authored-by: Georgi Gerganov --- ggml-cuda.cu | 8 ++++---- ggml-metal.metal | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 933ebbc..eef2135 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6205,7 +6205,7 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f const int ix = rowx*ncols + col; const int iy = rowy*ncols + col; - const float val = x[ix]*scale + (mask ? mask[iy] : 0.0f) + slope*pos[col]; + const float val = x[ix]*scale + (mask ? mask[iy] : 0.0f) + (pos ? slope*pos[col] : 0.0f); vals[col] = val; max_val = max(max_val, val); @@ -9170,17 +9170,17 @@ static void ggml_cuda_op_soft_max( memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float)); // positions tensor - float * src2_dd = dst_dd; // default to avoid null checks in the kernel + float * src2_dd = nullptr; cuda_pool_alloc src2_f; ggml_tensor * src2 = dst->src[2]; const bool use_src2 = src2 != nullptr; if (use_src2) { - const bool src2_on_device = use_src2 && src2->backend == GGML_BACKEND_GPU; - ggml_tensor_extra_gpu * src2_extra = use_src2 ? (ggml_tensor_extra_gpu *) src2->extra : nullptr; + const bool src2_on_device = src2->backend == GGML_BACKEND_GPU; if (src2_on_device) { + ggml_tensor_extra_gpu * src2_extra = (ggml_tensor_extra_gpu *) src2->extra; src2_dd = (float *) src2_extra->data_device[g_main_device]; } else { src2_dd = src2_f.alloc(ggml_nelements(src2)); diff --git a/ggml-metal.metal b/ggml-metal.metal index d0a85a1..f0d77d4 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -392,7 +392,7 @@ kernel void kernel_soft_max( float lmax = -INFINITY; for (int i00 = tpitg; i00 < ne00; i00 += ntg) { - lmax = MAX(lmax, psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f) + slope*ppos[i00]); + lmax = MAX(lmax, psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f) + (ppos ? slope*ppos[i00] : 0.0f)); } // find the max value in the block @@ -417,7 +417,7 @@ kernel void kernel_soft_max( // parallel sum float lsum = 0.0f; for (int i00 = tpitg; i00 < ne00; i00 += ntg) { - const float exp_psrc0 = exp((psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f) + slope*ppos[i00]) - max_val); + const float exp_psrc0 = exp((psrc0[i00]*scale + (pmask ? pmask[i00] : 0.0f) + (ppos ? slope*ppos[i00] : 0.0f)) - max_val); lsum += exp_psrc0; pdst[i00] = exp_psrc0; } @@ -495,7 +495,7 @@ kernel void kernel_soft_max_4( float4 lmax4 = -INFINITY; for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { - lmax4 = fmax(lmax4, psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f) + slope*ppos[i00]); + lmax4 = fmax(lmax4, psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f) + (ppos ? slope*ppos[i00] : 0.0f)); } const float lmax = MAX(MAX(lmax4[0], lmax4[1]), MAX(lmax4[2], lmax4[3])); @@ -521,7 +521,7 @@ kernel void kernel_soft_max_4( // parallel sum float4 lsum4 = 0.0f; for (int i00 = tpitg; i00 < ne00/4; i00 += ntg) { - const float4 exp_psrc4 = exp((psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f) + slope*ppos[i00]) - max_val); + const float4 exp_psrc4 = exp((psrc4[i00]*scale + (pmask ? pmask[i00] : 0.0f) + (ppos ? slope*ppos[i00] : 0.0f)) - max_val); lsum4 += exp_psrc4; pdst4[i00] = exp_psrc4; }