ggml : use ggml_row_size where possible (#4472)

* ggml : use ggml_row_size where possible

ggml-ci

* ggml : move ggml_nbytes_split to ggml-cuda.cu
This commit is contained in:
slaren 2023-12-14 20:05:21 +01:00 committed by GitHub
parent cafcd4f895
commit 6744dbe924
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 26 deletions

View file

@ -8898,6 +8898,12 @@ static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, gg
(void) dst; (void) dst;
} }
static size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return nrows_split*ggml_row_size(tensor->type, tensor->ne[0]);
}
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) { void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
const int64_t nrows = ggml_nrows(tensor); const int64_t nrows = ggml_nrows(tensor);
@ -8947,8 +8953,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
// pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses // pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
if (ne0 % MATRIX_ROW_PADDING != 0) { if (ne0 % MATRIX_ROW_PADDING != 0) {
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING) size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
} }
char * buf; char * buf;
@ -9485,8 +9490,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
if (ggml_is_quantized(tensor->type)) { if (ggml_is_quantized(tensor->type)) {
if (ne0 % MATRIX_ROW_PADDING != 0) { if (ne0 % MATRIX_ROW_PADDING != 0) {
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING) size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
} }
} }

18
ggml.c
View file

@ -1997,12 +1997,6 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN); return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
} }
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return (nrows_split*tensor->ne[0]*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type);
}
int ggml_blck_size(enum ggml_type type) { int ggml_blck_size(enum ggml_type type) {
return type_traits[type].blck_size; return type_traits[type].blck_size;
} }
@ -2491,7 +2485,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
view_src = view_src->view_src; view_src = view_src->view_src;
} }
size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type)); size_t data_size = ggml_row_size(type, ne[0]);
for (int i = 1; i < n_dims; i++) { for (int i = 1; i < n_dims; i++) {
data_size *= ne[i]; data_size *= ne[i];
} }
@ -9698,7 +9692,7 @@ static void ggml_compute_forward_mul_mat(
if (params->type == GGML_TASK_INIT) { if (params->type == GGML_TASK_INIT) {
if (src1->type != vec_dot_type) { if (src1->type != vec_dot_type) {
char * wdata = params->wdata; char * wdata = params->wdata;
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type); const size_t row_size = ggml_row_size(vec_dot_type, ne10);
assert(params->wsize >= ne11*ne12*ne13*row_size); assert(params->wsize >= ne11*ne12*ne13*row_size);
assert(src1->type == GGML_TYPE_F32); assert(src1->type == GGML_TYPE_F32);
@ -9721,7 +9715,7 @@ static void ggml_compute_forward_mul_mat(
} }
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type); const size_t row_size = ggml_row_size(vec_dot_type, ne10);
const int64_t nr0 = ne01; // src0 rows const int64_t nr0 = ne01; // src0 rows
const int64_t nr1 = cne1*ne12*ne13; // src1 rows const int64_t nr1 = cne1*ne12*ne13; // src1 rows
@ -16326,7 +16320,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
} else } else
#endif #endif
if (node->src[1]->type != vec_dot_type) { if (node->src[1]->type != vec_dot_type) {
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type); cur = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
} }
} break; } break;
case GGML_OP_MUL_MAT_ID: case GGML_OP_MUL_MAT_ID:
@ -16343,7 +16337,7 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
} else } else
#endif #endif
if (b->type != vec_dot_type) { if (b->type != vec_dot_type) {
cur = ggml_type_size(vec_dot_type)*ggml_nelements(b)/ggml_blck_size(vec_dot_type); cur = ggml_row_size(vec_dot_type, ggml_nelements(b));
} }
} break; } break;
case GGML_OP_OUT_PROD: case GGML_OP_OUT_PROD:
@ -18703,7 +18697,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
return NULL; return NULL;
} }
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type); const size_t size_cur = ggml_row_size(info->type, ne);
ctx->size += GGML_PAD(size_cur, ctx->alignment); ctx->size += GGML_PAD(size_cur, ctx->alignment);
} }

1
ggml.h
View file

@ -638,7 +638,6 @@ extern "C" {
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
GGML_API int ggml_blck_size(enum ggml_type type); GGML_API int ggml_blck_size(enum ggml_type type);
GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block

View file

@ -54,7 +54,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) { } else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0); GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
std::vector<uint8_t> dataq(ggml_type_size(tensor->type)*size/ggml_blck_size(tensor->type)); std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
int64_t hist[16]; int64_t hist[16];
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size, hist); ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size, hist);
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
@ -72,6 +72,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type); ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
size_t bs = ggml_blck_size(t->type); size_t bs = ggml_blck_size(t->type);
std::vector<float> vq(ggml_blck_size(t->type));
bool quantized = ggml_is_quantized(t->type);
// access elements by index to avoid gaps in views // access elements by index to avoid gaps in views
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) { for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
@ -85,9 +87,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
tv.push_back(*(float *) &buf[i]); tv.push_back(*(float *) &buf[i]);
} else if (t->type == GGML_TYPE_I32) { } else if (t->type == GGML_TYPE_I32) {
tv.push_back((float)*(int32_t *) &buf[i]); tv.push_back((float)*(int32_t *) &buf[i]);
} else if (ggml_is_quantized(t->type)) { } else if (quantized) {
std::vector<float> vq(ggml_blck_size(t->type)); tt.to_float(&buf[i], vq.data(), bs);
tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
tv.insert(tv.end(), vq.begin(), vq.end()); tv.insert(tv.end(), vq.begin(), vq.end());
} else { } else {
GGML_ASSERT(false); GGML_ASSERT(false);

View file

@ -286,7 +286,7 @@ int main(int argc, char * argv[]) {
qfns.from_float_reference(test_data1, test_q1, size); qfns.from_float_reference(test_data1, test_q1, size);
return test_q1[0]; return test_q1[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
@ -300,7 +300,7 @@ int main(int argc, char * argv[]) {
qfns.from_float(test_data1, test_q1, size); qfns.from_float(test_data1, test_q1, size);
return test_q1[0]; return test_q1[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
@ -315,7 +315,7 @@ int main(int argc, char * argv[]) {
qfns.to_float(test_q1, test_out, size); qfns.to_float(test_q1, test_out, size);
return test_out[0]; return test_out[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
@ -330,7 +330,7 @@ int main(int argc, char * argv[]) {
vdot.from_float(test_data1, test_q1, size); vdot.from_float(test_data1, test_q1, size);
return test_q1[0]; return test_q1[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
@ -347,7 +347,7 @@ int main(int argc, char * argv[]) {
qfns.vec_dot(size, &result, test_q1, test_q2); qfns.vec_dot(size, &result, test_q1, test_q2);
return result; return result;
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = ggml_row_size(type, size);
benchmark_function(size, quantized_size, iterations, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");