From 92b1bbd2ec43c82ec0530ba3c8758846c5790c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 28 Aug 2023 13:23:55 +0200 Subject: [PATCH] CUDA: fix RoPE asserts, block sizes (#2833) --- ggml-cuda.cu | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index d76a25dc2..5fd625630 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4908,8 +4908,8 @@ static void scale_f32_cuda(const float * x, float * dst, const float scale, cons static void rope_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, const float p0, const float p_delta, const int p_delta_rows, const float theta_scale, cudaStream_t stream) { - GGML_ASSERT(nrows % 2 == 0); // GG: is this assert really needed? I don't see why - const dim3 block_dims(1, 2*CUDA_ROPE_BLOCK_SIZE, 1); + GGML_ASSERT(ncols % 2 == 0); + const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1); const int num_blocks_x = (ncols + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE); const dim3 block_nums(nrows, num_blocks_x, 1); rope_f32<<>>(x, dst, ncols, p0, p_delta, p_delta_rows, theta_scale); @@ -4917,7 +4917,8 @@ static void rope_f32_cuda(const float * x, float * dst, const int ncols, const i static void rope_neox_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, const float p0, const float p_delta, const int p_delta_rows, const float theta_scale, cudaStream_t stream) { - const dim3 block_dims(1, 2*CUDA_ROPE_BLOCK_SIZE, 1); + GGML_ASSERT(ncols % 2 == 0); + const dim3 block_dims(1, CUDA_ROPE_BLOCK_SIZE, 1); const int num_blocks_x = (ncols + 2*CUDA_ROPE_BLOCK_SIZE - 1) / (2*CUDA_ROPE_BLOCK_SIZE); const dim3 block_nums(nrows, num_blocks_x, 1); rope_neox_f32<<>>(x, dst, ncols, p0, p_delta, p_delta_rows, theta_scale);