CUDA: fixed row rounding for 0 tensor splits (#4594)

This commit is contained in:
Johannes Gäßler 2023-12-23 09:16:33 +01:00 committed by GitHub
parent 7082d24cec
commit e0a4002273
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -7937,12 +7937,16 @@ static void ggml_cuda_op_mul_mat(
if (id != 0) {
row_low[id] = ne01*g_tensor_split[id];
row_low[id] -= row_low[id] % rounding;
if (row_low[id] < ne01) {
row_low[id] -= row_low[id] % rounding;
}
}
if (id != g_device_count - 1) {
row_high[id] = ne01*g_tensor_split[id + 1];
row_high[id] -= row_high[id] % rounding;
if (row_high[id] < ne01) {
row_high[id] -= row_high[id] % rounding;
}
}
}
}