From 61b9192f27f58dd21fb2c1a5572daded9e0ecefb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 15 Jan 2024 13:27:00 +0200 Subject: [PATCH] cuda : fix dequantize kernel names (llama/4938) --- ggml-cuda.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index a870718..c3e14bc 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6309,14 +6309,14 @@ static void dequantize_row_q3_K_cuda(const void * vx, dst_t * y, const int k, cu } template -static void dequantize_q4_0_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { +static void dequantize_row_q4_0_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { const int nb32 = k / 32; const int nb = (k + 255) / 256; dequantize_block_q4_0<<>>(vx, y, nb32); } template -static void dequantize_q4_1_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { +static void dequantize_row_q4_1_cuda(const void * vx, dst_t * y, const int k, cudaStream_t stream) { const int nb32 = k / 32; const int nb = (k + 255) / 256; dequantize_block_q4_1<<>>(vx, y, nb32); @@ -6370,9 +6370,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) { int id; switch (type) { case GGML_TYPE_Q4_0: - return dequantize_q4_0_cuda; + return dequantize_row_q4_0_cuda; case GGML_TYPE_Q4_1: - return dequantize_q4_1_cuda; + return dequantize_row_q4_1_cuda; case GGML_TYPE_Q5_0: return dequantize_block_cuda; case GGML_TYPE_Q5_1: @@ -6407,9 +6407,9 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) { static to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) { switch (type) { case GGML_TYPE_Q4_0: - return dequantize_q4_0_cuda; + return dequantize_row_q4_0_cuda; case GGML_TYPE_Q4_1: - return dequantize_q4_1_cuda; + return dequantize_row_q4_1_cuda; case GGML_TYPE_Q5_0: return dequantize_block_cuda; case GGML_TYPE_Q5_1: